diff --git a/pdfio-array.c b/pdfio-array.c index 44e35a9..088ba8c 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -584,9 +584,9 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file if (!_pdfioValueRead(pdf, tb, &value)) break; - PDFIO_DEBUG("_pdfioArrayRead(%p): Appending ", (void *)array); - PDFIO_DEBUG_VALUE(&value); - PDFIO_DEBUG("\n"); +// PDFIO_DEBUG("_pdfioArrayRead(%p): Appending ", (void *)array); +// PDFIO_DEBUG_VALUE(&value); +// PDFIO_DEBUG("\n"); append_value(array, &value); } diff --git a/pdfio-dict.c b/pdfio-dict.c index db69a44..b436e9f 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -457,7 +457,7 @@ _pdfioDictRead(pdfio_file_t *pdf, // I - PDF file if (!_pdfioDictSetValue(dict, pdfioStringCreate(pdf, key + 1), &value)) break; - PDFIO_DEBUG("_pdfioDictRead: Set %s.\n", key); +// PDFIO_DEBUG("_pdfioDictRead: Set %s.\n", key); } // Dictionary is invalid - pdfioFileClose will free the memory, return NULL diff --git a/pdfio-token.c b/pdfio-token.c index a9e6214..35ce9d1 100644 --- a/pdfio-token.c +++ b/pdfio-token.c @@ -109,6 +109,7 @@ _pdfioTokenFlush(_pdfio_token_t *tb) // I - Token buffer/stack else { // Nothing left, reset pointers... + PDFIO_DEBUG("_pdfioTokenFlush: Resetting pointers.\n"); tb->bufptr = tb->bufend = tb->buffer; } } diff --git a/pdfio-value.c b/pdfio-value.c index cf16df2..356d1b9 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -357,52 +357,75 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file if (isdigit(token[0]) && !strchr(token, '.')) { // Integer or object ref... - char token2[8192], // Second token (generation number) - token3[8192], // Third token ("R") - *tokptr; // Pointer into token + unsigned char *tempptr; // Pointer into buffer - if (_pdfioTokenGet(tb, token2, sizeof(token2))) + PDFIO_DEBUG("_pdfioValueRead: %d bytes left in buffer.\n", (int)(tb->bufend - tb->bufptr)); + if ((tb->bufend - tb->bufptr) < 10) { - // Got the second token, is it an integer? - for (tokptr = token2; *tokptr; tokptr ++) - { - if (!isdigit(*tokptr)) - break; - } + // Fill up buffer... + ssize_t bytes; // Bytes peeked - if (*tokptr) - { - // Not an object reference, push this token for later use... - _pdfioTokenPush(tb, token2); - } - else - { - // A possible reference, get one more... - if (_pdfioTokenGet(tb, token3, sizeof(token3))) + _pdfioTokenFlush(tb); + + if ((bytes = (tb->peek_cb)(tb->cb_data, tb->buffer, sizeof(tb->buffer))) > 0) + tb->bufend = tb->buffer + bytes; + + PDFIO_DEBUG("_pdfioValueRead: %d bytes now left in buffer.\n", (int)(tb->bufend - tb->bufptr)); + } + +#ifdef DEBUG + PDFIO_DEBUG("_pdfioValueRead: Bytes are '"); + for (tempptr = tb->bufptr; tempptr < tb->bufend; tempptr ++) { - if (!strcmp(token3, "R")) - { - // Reference! - v->type = PDFIO_VALTYPE_INDIRECT; - v->value.indirect.number = (size_t)strtoimax(token, NULL, 10); - v->value.indirect.generation = (unsigned short)strtol(token2, NULL, 10); - - PDFIO_DEBUG("_pdfioValueRead: Returning indirect value %lu %u R.\n", (unsigned long)v->value.indirect.number, v->value.indirect.generation); - - return (v); - } + if (*tempptr < ' ' || *tempptr == 0x7f) + PDFIO_DEBUG("\\%03o", *tempptr); else - { - // Not a reference, push the tokens back... - _pdfioTokenPush(tb, token3); - _pdfioTokenPush(tb, token2); - } + PDFIO_DEBUG("%c", *tempptr); } - else + PDFIO_DEBUG("'.\n"); +#endif // DEBUG + + tempptr = tb->bufptr; + + if (tempptr < tb->bufend && isdigit(*tempptr & 255)) + { + // Integer... + long generation = 0; // Generation number + + while (tempptr < tb->bufend && isdigit(*tempptr & 255)) + { + generation = generation * 10 + *tempptr - '0'; + tempptr ++; + } + + while (tempptr < tb->bufend && isspace(*tempptr & 255)) + tempptr ++; + + if (tempptr < tb->bufend && *tempptr == 'R') + { + // Reference! + PDFIO_DEBUG("_pdfioValueRead: Consuming %d bytes.\n", (int)(tempptr - tb->bufptr + 1)); + tb->bufptr = tempptr + 1; + +#ifdef DEBUG + PDFIO_DEBUG("_pdfioValueRead: Next bytes are '"); + for (tempptr = tb->bufptr; tempptr < tb->bufend; tempptr ++) { - // Not a reference... - _pdfioTokenPush(tb, token2); + if (*tempptr < ' ' || *tempptr == 0x7f) + PDFIO_DEBUG("\\%03o", *tempptr); + else + PDFIO_DEBUG("%c", *tempptr); } + PDFIO_DEBUG("'.\n"); +#endif // DEBUG + + v->type = PDFIO_VALTYPE_INDIRECT; + v->value.indirect.number = (size_t)strtoimax(token, NULL, 10); + v->value.indirect.generation = (unsigned short)generation; + + PDFIO_DEBUG("_pdfioValueRead: Returning indirect value %lu %u R.\n", (unsigned long)v->value.indirect.number, v->value.indirect.generation); + + return (v); } } } diff --git a/testpdfio.c b/testpdfio.c index ffcae3c..2e9e4da 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -127,14 +127,19 @@ do_test_file(const char *filename, // I - PDF filename if ((dict = pdfioObjGetDict(obj)) == NULL) { - puts("Not a stream."); - return (1); + _pdfioValueDebug(&obj->value, stdout); + putchar('\n'); + return (0); } filter = pdfioDictGetName(dict, "Filter"); if ((st = pdfioObjOpenStream(obj, (filter && !strcmp(filter, "FlateDecode")) ? PDFIO_FILTER_FLATE : PDFIO_FILTER_NONE)) == NULL) - return (1); + { + _pdfioValueDebug(&obj->value, stdout); + putchar('\n'); + return (0); + } while ((bytes = pdfioStreamRead(st, buffer, sizeof(buffer))) > 0) fwrite(buffer, 1, (size_t)bytes, stdout); @@ -242,6 +247,443 @@ do_unit_tests(void) "/ENDK00GK3c9DZN2n 23693 0 R/EPDB0NGN3Q9GZP2t 23695 0 R" "/EpDA0kG03o9rZX21 23696 0 R/Im0 5475 0 R>>>>/Rotate 0/StructParents 2105" "/Tabs/S/Type/Page>>"; + static const char *cid_dict = // CID font dictionary + "<" + ">\nendobj\n"; setbuf(stdout, NULL); @@ -262,7 +704,23 @@ do_unit_tests(void) if (_pdfioValueRead(pdf, &tb, &value)) { // TODO: Check value... - puts("PASS"); + fputs("PASS: ", stdout); + _pdfioValueDebug(&value, stdout); + puts("\n"); + } + else + return (1); + + // Test the value parsers for edge cases... + fputs("_pdfioValueRead(cid_dict): ", stdout); + s = cid_dict; + _pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)token_consume_cb, (_pdfio_tpeek_cb_t)token_peek_cb, (void *)&s); + if (_pdfioValueRead(pdf, &tb, &value)) + { + // TODO: Check value... + fputs("PASS: ", stdout); + _pdfioValueDebug(&value, stdout); + puts("\n"); } else return (1); @@ -1387,154 +1845,154 @@ write_font_test(pdfio_file_t *pdf, // I - PDF file int i; // Looping var static const char * const welcomes[] =// "Welcome" in many languages { - "Welcome\n", - "Welkom\n", - "ḫaṣānu\n", - "Mayad-ayad nga pad-abot\n", - "Mir se vjên\n", - "Mirë se vjen\n", - "Wellkumma\n", - "Bienveniu\n", - "Ghini vinit!\n", - "Bienveníu\n", - "Miro peicak\n", - "Xoş gəlmişsiniz!\n", - "Salamat datang\n", - "Сәләм бирем!\n", - "Menjuah-juah!\n", - "Še das d' kemma bisd\n", - "Mwaiseni\n", - "Maogmáng Pag-abót\n", - "Welkam\n", - "Dobrodošli\n", - "Degemer mat\n", - "Benvingut\n", - "Maayong pag-abot\n", - "Kopisanangan do kinorikatan\n", - "Bienvenida\n", - "Bien binidu\n", - "Bienbenidu\n", - "Hóʔą\n", - "Boolkhent!\n", - "Kopivosian do kinoikatan\n", - "Malipayeng Pag-abot!\n", - "Vítej\n", - "Velkommen\n", - "Salâm\n", - "Welkom\n", - "Emedi\n", - "Welkumin\n", - "Tere tulemast\n", - "Woé zɔ\n", - "Bienveníu\n", - "Vælkomin\n", - "Bula\n", - "Tervetuloa\n", - "Bienvenue\n", - "Wäljkiimen\n", - "Wäilkuumen\n", - "Wäilkuumen\n", - "Wolkom\n", - "Benvignût\n", - "Benvido\n", - "Willkommen\n", - "Ἀσπάζομαι!\n", - "Καλώς Ήρθες\n", - "Tikilluarit\n", - "Byen venu\n", - "Sannu da zuwa\n", - "Aloha\n", - "Wayakurua\n", - "Dayón\n", - "Zoo siab txais tos!\n", - "Üdvözlet\n", - "Selamat datai\n", - "Velkomin\n", - "Nnọọ\n", - "Selamat datang\n", - "Qaimarutin\n", - "Fáilte\n", - "Benvenuto\n", - "Voschata\n", - "Murakaza neza\n", - "Mauri\n", - "Tu be xér hatî ye!\n", - "Taŋyáŋ yahí\n", - "Salve\n", - "Laipni lūdzam\n", - "Wilkóm\n", - "Sveiki atvykę\n", - "Willkamen\n", - "Mu amuhezwi\n", - "Tukusanyukidde\n", - "Wëllkomm\n", - "Swagatam\n", - "Tonga soa\n", - "Selamat datang\n", - "Merħba\n", - "B’a’ntulena\n", - "Failt ort\n", - "Haere mai\n", - "mai\n", - "Pjila’si\n", - "Benvegnüu\n", - "Ne y kena\n", - "Ximopanōltih\n", - "Yá'át'ééh\n", - "Siyalemukela\n", - "Siyalemukela\n", - "Bures boahtin\n", - "Re a go amogela\n", - "Velkommen\n", - "Benvengut!\n", - "Bon bini\n", - "Witam Cię\n", - "Bem-vindo\n", - "Haykuykuy!\n", - "T'aves baxtalo\n", - "Bainvegni\n", - "Afio mai\n", - "Ennidos\n", - "Walcome\n", - "Fàilte\n", - "Mauya\n", - "Bon vinutu\n", - "Vitaj\n", - "Dobrodošli\n", - "Soo dhowow\n", - "Witaj\n", - "Bienvenido\n", - "Wilujeng sumping\n", - "Karibu\n", - "Wamukelekile\n", - "Välkommen\n", - "Wilkomme\n", - "Maligayang pagdating\n", - "Maeva\n", - "Räxim itegez\n", - "Ksolok Bodik Mai\n", - "Ulu tons mai\n", - "Welkam\n", - "Talitali fiefia\n", - "Lek oy li la tale\n", - "amogetswe\n", - "Tempokani\n", - "Hoş geldin\n", - "Koş geldiniz\n", - "Ulufale mai!\n", - "Xush kelibsiz\n", - "Benvignùo\n", - "Tervhen tuldes\n", - "Hoan nghênh\n", - "Tere tulõmast\n", - "Benvnuwe\n", - "Croeso\n", - "Merhbe\n", - "Wamkelekile\n", - "Märr-ŋamathirri\n", - "Ẹ ku abọ\n", - "Kíimak 'oolal\n", - "Ngiyakwemukela\n" + "Welcome", + "Welkom", + "ḫaṣānu", + "Mayad-ayad nga pad-abot", + "Mir se vjên", + "Mirë se vjen", + "Wellkumma", + "Bienveniu", + "Ghini vinit!", + "Bienveníu", + "Miro peicak", + "Xoş gəlmişsiniz!", + "Salamat datang", + "Сәләм бирем!", + "Menjuah-juah!", + "Še das d' kemma bisd", + "Mwaiseni", + "Maogmáng Pag-abót", + "Welkam", + "Dobrodošli", + "Degemer mat", + "Benvingut", + "Maayong pag-abot", + "Kopisanangan do kinorikatan", + "Bienvenida", + "Bien binidu", + "Bienbenidu", + "Hóʔą", + "Boolkhent!", + "Kopivosian do kinoikatan", + "Malipayeng Pag-abot!", + "Vítej", + "Velkommen", + "Salâm", + "Welkom", + "Emedi", + "Welkumin", + "Tere tulemast", + "Woé zɔ", + "Bienveníu", + "Vælkomin", + "Bula", + "Tervetuloa", + "Bienvenue", + "Wäljkiimen", + "Wäilkuumen", + "Wäilkuumen", + "Wolkom", + "Benvignût", + "Benvido", + "Willkommen", + "Ἀσπάζομαι!", + "Καλώς Ήρθες", + "Tikilluarit", + "Byen venu", + "Sannu da zuwa", + "Aloha", + "Wayakurua", + "Dayón", + "Zoo siab txais tos!", + "Üdvözlet", + "Selamat datai", + "Velkomin", + "Nnọọ", + "Selamat datang", + "Qaimarutin", + "Fáilte", + "Benvenuto", + "Voschata", + "Murakaza neza", + "Mauri", + "Tu be xér hatî ye!", + "Taŋyáŋ yahí", + "Salve", + "Laipni lūdzam", + "Wilkóm", + "Sveiki atvykę", + "Willkamen", + "Mu amuhezwi", + "Tukusanyukidde", + "Wëllkomm", + "Swagatam", + "Tonga soa", + "Selamat datang", + "Merħba", + "B’a’ntulena", + "Failt ort", + "Haere mai", + "mai", + "Pjila’si", + "Benvegnüu", + "Ne y kena", + "Ximopanōltih", + "Yá'át'ééh", + "Siyalemukela", + "Siyalemukela", + "Bures boahtin", + "Re a go amogela", + "Velkommen", + "Benvengut!", + "Bon bini", + "Witam Cię", + "Bem-vindo", + "Haykuykuy!", + "T'aves baxtalo", + "Bainvegni", + "Afio mai", + "Ennidos", + "Walcome", + "Fàilte", + "Mauya", + "Bon vinutu", + "Vitaj", + "Dobrodošli", + "Soo dhowow", + "Witaj", + "Bienvenido", + "Wilujeng sumping", + "Karibu", + "Wamukelekile", + "Välkommen", + "Wilkomme", + "Maligayang pagdating", + "Maeva", + "Räxim itegez", + "Ksolok Bodik Mai", + "Ulu tons mai", + "Welkam", + "Talitali fiefia", + "Lek oy li la tale", + "amogetswe", + "Tempokani", + "Hoş geldin", + "Koş geldiniz", + "Ulufale mai!", + "Xush kelibsiz", + "Benvignùo", + "Tervhen tuldes", + "Hoan nghênh", + "Tere tulõmast", + "Benvnuwe", + "Croeso", + "Merhbe", + "Wamkelekile", + "Märr-ŋamathirri", + "Ẹ ku abọ", + "Kíimak 'oolal", + "Ngiyakwemukela" }; @@ -1620,8 +2078,8 @@ write_font_test(pdfio_file_t *pdf, // I - PDF file return (1); } - printf("pdfioContentTextShow(\"%s\"): ", welcomes[i]); - if (pdfioContentTextShow(st, unicode, welcomes[i])) + printf("pdfioContentTextShowf(\"%s\"): ", welcomes[i]); + if (pdfioContentTextShowf(st, unicode, "%s\n", welcomes[i])) puts("PASS"); else return (1);