Save work on loading object dictionaries - have a memory issue someplace.

This commit is contained in:
Michael R Sweet 2021-05-04 21:31:58 -04:00
parent 7afdfc725c
commit 4abb91ca24
No known key found for this signature in database
GPG Key ID: 999559A027815955
8 changed files with 139 additions and 38 deletions

View File

@ -67,7 +67,7 @@ all-shared:
fi fi
debug: debug:
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="$(COMMONFLAGS) -DDEBUG=1" clean all $(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-g -fsanitize=address -DDEBUG=1" clean all
# Clean everything # Clean everything

View File

@ -727,6 +727,8 @@ _pdfioDictSetValue(
_pdfio_pair_t *pair; // Current pair _pdfio_pair_t *pair; // Current pair
PDFIO_DEBUG("_pdfioDictSetValue(dict=%p, key=\"%s\", value=%p)\n", dict, key, (void *)value);
// See if the key is already set... // See if the key is already set...
if (dict->num_pairs > 0) if (dict->num_pairs > 0)
{ {
@ -765,6 +767,11 @@ _pdfioDictSetValue(
if (dict->num_pairs > 1) if (dict->num_pairs > 1)
qsort(dict->pairs, dict->num_pairs, sizeof(_pdfio_pair_t), (int (*)(const void *, const void *))compare_pairs); qsort(dict->pairs, dict->num_pairs, sizeof(_pdfio_pair_t), (int (*)(const void *, const void *))compare_pairs);
#ifdef DEBUG
PDFIO_DEBUG("_pdfioDictSetValue: %lu pairs\n", (unsigned long)dict->num_pairs);
PDFIO_DEBUG_DICT(dict, "_pdfioDictSetValue");
#endif // DEBUG
return (true); return (true);
} }

View File

@ -302,12 +302,12 @@ pdfioFileGetNumPages(pdfio_file_t *pdf) // I - PDF file
pdfio_obj_t * // O - Object pdfio_obj_t * // O - Object
pdfioFileGetObject(pdfio_file_t *pdf, // I - PDF file pdfioFileGetObject(pdfio_file_t *pdf, // I - PDF file
size_t number) // I - Object number (starting at 1) size_t n) // I - Object index (starting at 0)
{ {
if (!pdf || number < 1 || number > pdf->num_objs) if (!pdf || n >= pdf->num_objs)
return (NULL); return (NULL);
else else
return (pdf->objs[number - 1]); return (pdf->objs[n]);
} }
@ -317,12 +317,12 @@ pdfioFileGetObject(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t * // O - Object pdfio_obj_t * // O - Object
pdfioFileGetPage(pdfio_file_t *pdf, // I - PDF file pdfioFileGetPage(pdfio_file_t *pdf, // I - PDF file
size_t number) // I - Page number (starting at 1) size_t n) // I - Page index (starting at 0)
{ {
if (!pdf || number < 1 || number > pdf->num_pages) if (!pdf || n >= pdf->num_pages)
return (NULL); return (NULL);
else else
return (pdf->pages[number - 1]); return (pdf->pages[n]);
} }
@ -474,6 +474,7 @@ add_obj(pdfio_file_t *pdf, // I - PDF file
pdf->objs[pdf->num_objs ++] = obj; pdf->objs[pdf->num_objs ++] = obj;
obj->pdf = pdf;
obj->number = number; obj->number = number;
obj->generation = generation; obj->generation = generation;
obj->offset = offset; obj->offset = offset;

View File

@ -67,10 +67,16 @@ _pdfioObjDelete(pdfio_obj_t *obj) // I - Object
pdfio_dict_t * // O - Dictionary or `NULL` on error pdfio_dict_t * // O - Dictionary or `NULL` on error
pdfioObjGetDict(pdfio_obj_t *obj) // I - Object pdfioObjGetDict(pdfio_obj_t *obj) // I - Object
{ {
// TODO: Implement me if (!obj)
(void)obj; return (NULL);
return (NULL); if (obj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(obj);
if (obj->value.type == PDFIO_VALTYPE_DICT)
return (obj->value.value.dict);
else
return (NULL);
} }
@ -103,29 +109,18 @@ pdfioObjGetNumber(pdfio_obj_t *obj) // I - Object
const char * // O - Object type const char * // O - Object type
pdfioObjGetType(pdfio_obj_t *obj) // I - Object pdfioObjGetType(pdfio_obj_t *obj) // I - Object
{ {
// TODO: Implement me pdfio_dict_t *dict; // Object dictionary
(void)obj;
return (NULL);
if ((dict = pdfioObjGetDict(obj)) == NULL)
return (NULL);
else
return (pdfioDictGetName(dict, "Type"));
} }
// //
// 'pdfioObjOpenStream()' - Open an object's (data) stream for reading. // '_pdfioObjLoad()' - Load an object dictionary/value.
//
pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjOpenStream(pdfio_obj_t *obj) // I - Object
{
// TODO: Implement me
(void)obj;
return (NULL);
}
//
// '()' - Load an object dictionary/value.
// //
bool // O - `true` on success, `false` otherwise bool // O - `true` on success, `false` otherwise
@ -135,6 +130,8 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
*ptr; // Pointer into line *ptr; // Pointer into line
PDFIO_DEBUG("_pdfioObjLoad(obj=%p(%lu)), offset=%lu\n", obj, (unsigned long)obj->number, (unsigned long)obj->offset);
// Seek to the start of the object and read its header... // Seek to the start of the object and read its header...
if (_pdfioFileSeek(obj->pdf, obj->offset, SEEK_SET) != obj->offset) if (_pdfioFileSeek(obj->pdf, obj->offset, SEEK_SET) != obj->offset)
{ {
@ -170,6 +167,8 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
} }
// Then grab the object value... // Then grab the object value...
_pdfioFileClearTokens(obj->pdf);
if (!_pdfioValueRead(obj->pdf, &obj->value)) if (!_pdfioValueRead(obj->pdf, &obj->value))
{ {
_pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number); _pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number);
@ -191,3 +190,17 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
return (true); return (true);
} }
//
// 'pdfioObjOpenStream()' - Open an object's (data) stream for reading.
//
pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjOpenStream(pdfio_obj_t *obj) // I - Object
{
// TODO: Implement me
(void)obj;
return (NULL);
}

View File

@ -39,6 +39,8 @@ pdfioStringCreate(
char *news; // New string char *news; // New string
PDFIO_DEBUG("pdfioStringCreate(pdf=%p, s=\"%s\")\n", pdf, s);
// Range check input... // Range check input...
if (!pdf || !s) if (!pdf || !s)
return (NULL); return (NULL);
@ -54,7 +56,7 @@ pdfioStringCreate(
if (pdf->num_strings >= pdf->alloc_strings) if (pdf->num_strings >= pdf->alloc_strings)
{ {
// Expand the string array... // Expand the string array...
char **temp = realloc(pdf->strings, (pdf->alloc_strings + 32) * sizeof(char *)); char **temp = (char **)realloc(pdf->strings, (pdf->alloc_strings + 32) * sizeof(char *));
if (!temp) if (!temp)
{ {
@ -72,6 +74,16 @@ pdfioStringCreate(
if (pdf->num_strings > 1) if (pdf->num_strings > 1)
qsort(pdf->strings, pdf->num_strings, sizeof(char *), (int (*)(const void *, const void *))compare_strings); qsort(pdf->strings, pdf->num_strings, sizeof(char *), (int (*)(const void *, const void *))compare_strings);
#ifdef DEBUG
{
size_t i; // Looping var
PDFIO_DEBUG("pdfioStringCreate: %lu strings\n", (unsigned long)pdf->num_strings);
for (i = 0; i < pdf->num_strings; i ++)
PDFIO_DEBUG("pdfioStringCreate: strings[%lu]=%p(\"%s\")\n", (unsigned long)i, pdf->strings[i], pdf->strings[i]);
}
#endif // DEBUG
return (news); return (news);
} }

View File

@ -90,14 +90,13 @@ _pdfioTokenRead(
// "state" is: // "state" is:
// //
// - '\0' for idle // - '\0' for idle
// - ')' for literal string // - '(' for literal string
// - '/' for name // - '/' for name
// - '<' for possible hex string or dict // - '<' for possible hex string or dict
// - '>' for possible dict // - '>' for possible dict
// - '%' for comment // - '%' for comment
// - 'K' for keyword // - 'K' for keyword
// - 'N' for number // - 'N' for number
// - 'X' for hex string
// Read the next token, skipping any leading whitespace... // Read the next token, skipping any leading whitespace...
memset(&tb, 0, sizeof(tb)); memset(&tb, 0, sizeof(tb));
@ -147,7 +146,7 @@ _pdfioTokenRead(
switch (state) switch (state)
{ {
case ')' : // Literal string case '(' : // Literal string
while ((ch = get_char(&tb)) != EOF && ch != ')') while ((ch = get_char(&tb)) != EOF && ch != ')')
{ {
if (ch == '\\') if (ch == '\\')
@ -259,6 +258,7 @@ _pdfioTokenRead(
if (!isdigit(ch) && ch != '.') if (!isdigit(ch) && ch != '.')
{ {
// End of number... // End of number...
tb.bufptr --;
break; break;
} }
else if (bufptr < bufend) else if (bufptr < bufend)
@ -278,7 +278,13 @@ _pdfioTokenRead(
case '/' : // "/name" case '/' : // "/name"
while ((ch = get_char(&tb)) != EOF && !isspace(ch)) while ((ch = get_char(&tb)) != EOF && !isspace(ch))
{ {
if (ch == '#') if (strchr(PDFIO_DELIM_CHARS, ch) != NULL)
{
// End of keyword...
tb.bufptr --;
break;
}
else if (ch == '#')
{ {
// Quoted character (#xx) in name... // Quoted character (#xx) in name...
int i; // Looping var int i; // Looping var
@ -325,9 +331,6 @@ _pdfioTokenRead(
return (false); return (false);
} }
// Fall through to parse a hex string...
case 'X' : // Hex string
while ((ch = get_char(&tb)) != EOF && ch != '>') while ((ch = get_char(&tb)) != EOF && ch != '>')
{ {
if (isxdigit(ch)) if (isxdigit(ch))

View File

@ -145,6 +145,12 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
v->type = PDFIO_VALTYPE_STRING; v->type = PDFIO_VALTYPE_STRING;
v->value.string = pdfioStringCreate(pdf, token + 1); v->value.string = pdfioStringCreate(pdf, token + 1);
} }
else if (token[0] == '/')
{
// Name
v->type = PDFIO_VALTYPE_NAME;
v->value.name = pdfioStringCreate(pdf, token + 1);
}
else if (token[0] == '<') else if (token[0] == '<')
{ {
// Hex string // Hex string
@ -220,6 +226,8 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
v->value.indirect.number = (size_t)strtoimax(token, NULL, 10); v->value.indirect.number = (size_t)strtoimax(token, NULL, 10);
v->value.indirect.generation = (unsigned short)strtol(token2, NULL, 10); v->value.indirect.generation = (unsigned short)strtol(token2, NULL, 10);
PDFIO_DEBUG("_pdfioValueRead: Returning indirect value %lu %u R.\n", (unsigned long)v->value.indirect.number, v->value.indirect.generation);
return (v); return (v);
} }
else else

View File

@ -11,7 +11,7 @@
// Include necessary headers... // Include necessary headers...
// //
#include "pdfio.h" #include "pdfio-private.h"
// //
@ -26,12 +26,69 @@ main(int argc, // I - Number of command-line arguments
{ {
int i; // Looping var int i; // Looping var
pdfio_file_t *pdf; // PDF file pdfio_file_t *pdf; // PDF file
size_t n, // Object/page index
num_objs, // Number of objects
num_pages; // Number of pages
pdfio_obj_t *obj; // Object
pdfio_dict_t *dict; // Object dictionary
const char *type; // Object type
for (i = 1; i < argc; i ++) for (i = 1; i < argc; i ++)
{ {
if ((pdf = pdfioFileOpen(argv[i], NULL, NULL)) != NULL) if ((pdf = pdfioFileOpen(argv[i], NULL, NULL)) != NULL)
{ {
printf("%s: PDF %s, %d objects.\n", argv[i], pdfioFileGetVersion(pdf), (int)pdfioFileGetNumObjects(pdf)); num_objs = pdfioFileGetNumObjects(pdf);
num_pages = pdfioFileGetNumPages(pdf);
printf("%s: PDF %s, %d objects, %d pages.\n", argv[i], pdfioFileGetVersion(pdf), (int)num_objs, (int)num_pages);
for (n = 0; n < num_objs; n ++)
{
if ((obj = pdfioFileGetObject(pdf, n)) == NULL)
{
printf("%s: Unable to get object #%d.\n", argv[i], (int)n);
}
else
{
size_t np; // Number of pairs
_pdfio_pair_t *pair; // Current pair
dict = pdfioObjGetDict(obj);
printf("%s: %u %u obj dict=%p(%lu)\n", argv[i], (unsigned)pdfioObjGetNumber(obj), (unsigned)pdfioObjGetGeneration(obj), dict, dict ? (unsigned long)dict->num_pairs : 0UL);
if (dict)
{
for (np = dict->num_pairs, pair = dict->pairs; np > 0; np --, pair ++)
{
switch (pair->value.type)
{
case PDFIO_VALTYPE_INDIRECT :
printf(" /%s %u %u R\n", pair->key, (unsigned)pair->value.value.indirect.number, pair->value.value.indirect.generation);
break;
case PDFIO_VALTYPE_NUMBER :
printf(" /%s %g\n", pair->key, pair->value.value.number);
break;
case PDFIO_VALTYPE_BOOLEAN :
printf(" /%s %s\n", pair->key, pair->value.value.boolean ? "true" : "false");
break;
case PDFIO_VALTYPE_NULL :
printf(" /%s null\n", pair->key);
break;
case PDFIO_VALTYPE_ARRAY :
printf(" /%s [...]\n", pair->key);
break;
case PDFIO_VALTYPE_DICT :
printf(" /%s <<...>>\n", pair->key);
break;
default :
printf(" /%s ...\n", pair->key);
break;
}
}
}
}
}
pdfioFileClose(pdf); pdfioFileClose(pdf);
} }
} }