Save work on low-level file I/O.

This commit is contained in:
Michael R Sweet 2021-04-26 10:42:01 -04:00
parent 253717248c
commit 11588ce2f5
No known key found for this signature in database
GPG Key ID: 999559A027815955
6 changed files with 392 additions and 37 deletions

View File

@ -14,6 +14,15 @@
#include "pdfio-private.h"
//
// Local functions...
//
static bool fill_buffer(pdfio_file_t *pdf);
static ssize_t read_buffer(pdfio_file_t *pdf, char *buffer, size_t bytes);
static bool write_buffer(pdfio_file_t *pdf, const char *buffer, size_t bytes);
//
// '_pdfioFileDefaultError()' - Default error callback.
//
@ -54,3 +63,283 @@ _pdfioFileError(pdfio_file_t *pdf, // I - PDF file
return ((pdf->error_cb)(pdf, buffer, pdf->error_data));
}
//
// '_pdfioFileGetChar()' - Get a character from a PDF file.
//
int // O - Character or `-1` on EOF
_pdfioFileGetChar(pdfio_file_t *pdf) // I - PDF file
{
// If there is a character ready in the buffer, return it now...
if (pdf->bufptr < pdf->bufend)
return (*(pdf->bufptr ++));
// Otherwise try to fill the read buffer...
if (!fill_buffer(pdf))
return (-1);
// Then return the next character in the buffer...
return (*(pdf->bufptr ++));
}
//
// '_pdfioFileRead()' - Read from a PDF file.
//
ssize_t // O - Number of bytes read or `-1` on error
_pdfioFileRead(pdfio_file_t *pdf, // I - PDF file
char *buffer, // I - Read buffer
size_t bytes) // I - Number of bytes to read
{
ssize_t total, // Total bytes read
rbytes; // Bytes read this time
// Loop until we have read all of the requested bytes or hit an error...
for (total = 0; bytes > 0; total += rbytes, bytes -= (size_t)rbytes, buffer += rbytes)
{
// First read from the file buffer...
if ((rbytes = pdf->bufend - pdf->bufptr) > 0)
{
if ((size_t)rbytes > bytes)
rbytes = (ssize_t)bytes;
memcpy(buffer, pdf->bufptr, rbytes);
pdf->bufptr += rbytes;
continue;
}
// Nothing buffered...
if (bytes > 1024)
{
// Read directly from the file...
if ((rbytes = read_buffer(pdf, buffer, bytes)) > 0)
{
pdf->bufpos += rbytes;
continue;
}
else
break;
}
else
{
// Fill buffer and try again...
if (!fill_buffer(pdf))
break;
}
}
return (total);
}
//
// '_pdfioFileSeek()' - Seek within a PDF file.
//
off_t // O - New offset from beginning of file or `-1` on error
_pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file
off_t offset, // I - Offset
int whence) // I - Offset base
{
// Adjust offset for relative seeks...
if (whence == SEEK_CUR)
{
offset += pdf->bufpos;
whence = SEEK_SET;
}
if (pdf->mode == PDFIO_MODE_READ)
{
// Reading, see if we already have the data we need...
if (whence != SEEK_END && offset >= pdf->bufpos && offset < (pdf->bufpos + pdf->bufend - pdf->buffer))
{
// Yes, seek within existing buffer...
pdf->bufptr = pdf->buffer + offset - pdf->bufpos;
return (offset);
}
// No, reset the read buffer
pdf->bufptr = pdf->bufend = NULL;
}
else
{
// Writing, make sure we write any buffered data...
if (pdf->bufptr > pdf->buffer)
{
if (!write_buffer(pdf, pdf->buffer, (size_t)(pdf->bufptr - pdf->buffer)))
return (-1);
}
pdf->bufptr = pdf->buffer;
}
// Seek within the file...
if ((offset = lseek(pdf->fd, offset, whence)) < 0)
{
_pdfioFileError(pdf, "Unable to seek within file - %s", strerror(errno));
return (-1);
}
pdf->bufpos = offset;
return (offset);
}
//
// '_pdfioFileTell()' - Return the offset within a PDF file.
//
off_t // O - Offset from beginning of file
_pdfioFileTell(pdfio_file_t *pdf) // I - PDF file
{
if (pdf->bufptr)
return (pdf->bufpos + (pdf->bufptr - pdf->buffer));
else
return (pdf->bufpos);
}
//
// '_pdfioFileWrite()' - Write to a PDF file.
//
bool // O - `true` on success and `false` on error
_pdfioFileWrite(pdfio_file_t *pdf, // I - PDF file
const char *buffer, // I - Write buffer
size_t bytes) // I - Bytes to write
{
// See if the data will fit in the write buffer...
if (bytes > (size_t)(pdf->bufend - pdf->bufptr))
{
// No room, flush any current data...
if (pdf->bufptr > pdf->buffer)
{
if (!write_buffer(pdf, pdf->buffer, (size_t)(pdf->bufptr - pdf->buffer)))
return (false);
pdf->bufpos += pdf->bufptr - pdf->buffer;
}
pdf->bufptr = pdf->buffer;
if (bytes >= sizeof(pdf->buffer))
{
// Write directly...
if (!write_buffer(pdf, buffer, bytes))
return (false);
pdf->bufpos += bytes;
return (true);
}
}
// Copy data to the buffer and return...
memcpy(pdf->bufptr, buffer, bytes);
pdf->bufptr += bytes;
return (true);
}
//
// 'fill_buffer()' - Fill the read buffer in a PDF file.
//
static bool // O - `true` on success, `false` on failure
fill_buffer(pdfio_file_t *pdf) // I - PDF file
{
ssize_t bytes; // Bytes read...
// Advance current position in file as needed...
if (pdf->bufend)
pdf->bufpos += pdf->bufend - pdf->buffer;
// Try reading from the file...
if ((bytes = read_buffer(pdf, pdf->buffer, sizeof(pdf->buffer))) <= 0)
{
// EOF or hard error...
pdf->bufptr = pdf->bufend = NULL;
return (false);
}
else
{
// Successful read...
pdf->bufptr = pdf->buffer;
pdf->bufend = pdf->buffer + bytes;
return (true);
}
}
//
// 'read_buffer()' - Read a buffer from a PDF file.
//
static ssize_t // O - Number of bytes read or -1 on error
read_buffer(pdfio_file_t *pdf, // I - PDF file
char *buffer, // I - Buffer
size_t bytes) // I - Number of bytes to read
{
ssize_t rbytes; // Bytes read...
// Read from the file...
while ((rbytes = read(pdf->fd, buffer, bytes)) < 0)
{
// Stop if we have an error that shouldn't be retried...
if (errno != EINTR && errno != EAGAIN)
break;
}
if (rbytes < 0)
{
// Hard error...
_pdfioFileError(pdf, "Unable to read from file - %s", strerror(errno));
}
return (rbytes);
}
//
// 'write_buffer()' - Write a buffer to a PDF file.
//
static bool // O - `true` on success and `false` on error
write_buffer(pdfio_file_t *pdf, // I - PDF file
const char *buffer, // I - Write buffer
size_t bytes) // I - Bytes to write
{
ssize_t wbytes; // Bytes written...
// Write to the file...
while (bytes > 0)
{
while ((wbytes = write(pdf->fd, buffer, bytes)) < 0)
{
// Stop if we have an error that shouldn't be retried...
if (errno != EINTR && errno != EAGAIN)
break;
}
if (wbytes < 0)
{
// Hard error...
_pdfioFileError(pdf, "Unable to write to file - %s", strerror(errno));
return (false);
}
buffer += wbytes;
bytes -= wbytes;
}
return (true);
}

View File

@ -14,39 +14,57 @@
#include "pdfio-private.h"
//
// '()' - .
// 'pdfioObjClose()' - Close an object, writing any data as needed to the PDF
// file.
//
bool pdfioObjClose(pdfio_object_t *obj)
bool // O - `true` on success, `false` on failure
pdfioObjClose(pdfio_obj_t *obj) // I - Object
{
return (false);
}
//
// 'pdfioObjCreateStream()' - Create an object (data) stream for writing.
//
pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjCreateStream(
pdfio_obj_t *obj, // I - Object
pdfio_filter_t filter) // I - Type of compression to apply
{
}
//
// '()' - .
// '_pdfioObjDelete()' - Free memory used by an object.
//
pdfio_stream_t *pdfioObjCreateStream(pdfio_obj_t *obj, pdfio_compress_t compression)
void
_pdfioObjDelete(pdfio_object_t *obj) // I - Object
{
if (obj)
pdfioStreamClose(obj->stream);
free(obj);
}
//
// '()' - .
// 'pdfioObjGetDict()' - Get the dictionary associated with an object.
//
void _pdfioObjDelete(pdfio_object_t *obj)
pdfio_dict_t * // O - Dictionary or `NULL` on error
pdfioObjGetDict(pdfio_obj_t *obj) // I - Object
{
}
// TODO: Implement me
(void)obj;
//
// '()' - .
//
pdfio_dict_t *pdfioObjGetDict(pdfio_obj_t *obj)
{
return (NULL);
}
@ -68,15 +86,6 @@ int pdfioObjGetNumber(pdfio_obj_t *obj)
}
//
// '()' - .
//
pdfio_stream_t *pdfioObjGetStream(pdfio_obj_t *obj)
{
}
//
// '()' - .
//
@ -86,3 +95,11 @@ const char *pdfioObjGetType(pdfio_obj_t *obj)
}
//
// 'pdfioObjOpenStream()' - Open an object's (data) stream for reading.
//
pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjOpenStream(pdfio_obj_t *obj) // I - Object
{
}

View File

@ -16,10 +16,12 @@
# include "pdfio.h"
# include <stdarg.h>
# include <errno.h>
# include <fcntl.h>
# include <unistd.h>
# include <string.h>
# include <ctype.h>
# include <zlib.h>
//
@ -56,11 +58,18 @@
struct _pdfio_file_s // PDF file structure
{
char *filename; // Filename
int fd; // File descriptor
const char *filename; // Filename
pdfio_mode_t mode; // Read/write mode
pdfio_error_cb_t error_cb; // Error callback
void *error_data; // Data for error callback
// Active file data
int fd; // File descriptor
char buffer[8192], // Read/write buffer
*bufptr, // Pointer into buffer
*bufend; // End of buffer
off_t bufpos; // Position in file for start of buffer
// Allocated data elements
size_t num_arrays, // Number of arrays
alloc_arrays; // Allocated arrays
@ -81,9 +90,22 @@ struct _pdfio_obj_s // Object
pdfio_file_t *pdf; // PDF file
int number, // Number
generation; // Generation
off_t offset; // Offset in file
size_t length; // Length
off_t dict_offset, // Offset to dict in file
length_offset, // Offset to /Length in dict
stream_offset; // Offset to start of stream in file
size_t stream_length; // Length of stream, if any
pdfio_dict_t *dict; // Dictionary
pdfio_stream_t *stream; // Open stream, if any
};
struct _pdfio_stream_s // Stream
{
pdfio_file_t *pdf; // PDF file
pdfio_obj_t *obj; // Object
pdfio_filter_t filter; // Compression/decompression filter
char buffer[8192]; // Read/write buffer
size_t bufused; // Number of bytes in buffer
z_stream flate; // Flate filter state
};
typedef struct _pdfio_value_s // Value structure
@ -109,15 +131,26 @@ typedef struct _pdfio_value_s // Value structure
extern void _pdfioArrayDelete(pdfio_array_t *a) PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioArrayGetValue(pdfio_array_t *a, size_t n) PDFIO_INTERNAL;
extern void _pdfioDictDelete(pdfio_dict_t *dict) PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioDictGetValue(pdfio_dict_t *dict, const char *key) PDFIO_INTERNAL;
extern bool _pdfioDictSetValue(pdfio_dict_t *dict, const char *key, _pdfio_value_t *value) PDFIO_INTERNAL;
extern bool _pdfioFileDefaultError(pdfio_file_t *pdf, const char *message, void *data) PDFIO_INTERNAL;
extern void _pdfioFileDelete(pdfio_file_t *file) PDFIO_INTERNAL;
extern void _pdfioFileDelete(pdfio_file_t *pdf) PDFIO_INTERNAL;
extern bool _pdfioFileError(pdfio_file_t *pdf, const char *format, ...) PDFIO_FORMAT(2,3) PDFIO_INTERNAL;
extern int _pdfioFileGetChar(pdfio_file_t *pdf) PDFIO_INTERNAL;
extern ssize_t _pdfioFileRead(pdfio_file_t *pdf, char *buffer, size_t bytes) PDFIO_INTERNAL;
extern off_t _pdfioFileSeek(pdfio_file_t *pdf, off_t offset, int whence) PDFIO_INTERNAL;
extern off_t _pdfioFileTell(pdfio_file_t *pdf) PDFIO_INTERNAL;
extern bool _pdfioFileWrite(pdfio_file_t *pdf, const char *buffer, size_t bytes) PDFIO_INTERNAL;
extern void _pdfioObjDelete(pdfio_obj_t *obj) PDFIO_INTERNAL;
extern void _pdfioStreamDelete(pdfio_stream_t *obj) PDFIO_INTERNAL;
extern bool _pdfioStringIsAllocated(pdfio_file_t *pdf, const char *s) PDFIO_INTERNAL;
extern void _pdfioValueDelete(_pdfio_value_t *v) PDFIO_INTERNAL;

View File

@ -32,6 +32,16 @@ void _pdfioStreamDelete(pdfio_stream_t *obj)
}
//
// '()' - .
//
bool
pdfioStreamGetToken(pdfio_stream_t *st, char *buffer, size_t bufsize)
{
}
//
// '()' - .
//

View File

@ -108,7 +108,7 @@ pdfioStringCreatef(
//
// '()' - Check whether a string has been allocated.
// '_pdfioStringIsAllocated()' - Check whether a string has been allocated.
//
bool // O - `true` if allocated, `false` otherwise

22
pdfio.h
View File

@ -49,17 +49,22 @@ extern "C" {
typedef struct _pdfio_array_s pdfio_array_t;
// Array of PDF values
typedef enum pdfio_compress_e // Types of compression to use when writing streams
{
PDFIO_COMPRESS_NONE, // No compression
PDFIO_COMPRESS_FLATE // Flate compression
} pdfio_compress_t;
typedef struct _pdfio_dict_s pdfio_dict_t;
// Key/value dictionary
typedef struct _pdfio_file_s pdfio_file_t;
// PDF file
typedef bool (*pdfio_error_cb_t)(pdfio_file_t *pdf, const char *message, void *data);
// Error callback
typedef enum pdfio_filter_e // Compression/decompression filters for streams
{
PDFIO_FILTER_NONE, // No filter
PDFIO_FILTER_FLATE // Flate filter
} pdfio_filter_t;
typedef enum pdfio_mode_e // Read/write mode
{
PDFIO_MODE_READ, // Read a PDF file
PDFIO_MODE_WRITE // Write a PDF file
} pdfio_mode_t;
typedef struct _pdfio_obj_s pdfio_obj_t;// Numbered object in PDF file
typedef struct pdfio_rect_s // PDF rectangle
{
@ -138,19 +143,20 @@ extern unsigned pdfioFileGetNumObjects(pdfio_file_t *pdf) PDFIO_PUBLIC;
extern unsigned pdfioFileGetNumPages(pdfio_file_t *pdf) PDFIO_PUBLIC;
extern pdfio_obj_t *pdfioFileGetObject(pdfio_file_t *pdf, unsigned number) PDFIO_PUBLIC;
extern pdfio_obj_t *pdfioFileGetPage(pdfio_file_t *pdf, unsigned number) PDFIO_PUBLIC;
extern pdfio_file_t *pdfioFileOpen(const char *filename, const char *mode, pdfio_error_cb_t error_cb, void *error_data) PDFIO_PUBLIC;
extern pdfio_file_t *pdfioFileOpen(const char *filename, pdfio_mode_t mode, pdfio_error_cb_t error_cb, void *error_data) PDFIO_PUBLIC;
extern bool pdfioObjClose(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern pdfio_stream_t *pdfioObjCreateStream(pdfio_obj_t *obj, pdfio_compress_t compression) PDFIO_PUBLIC;
extern pdfio_stream_t *pdfioObjCreateStream(pdfio_obj_t *obj, pdfio_filter_t compression) PDFIO_PUBLIC;
extern pdfio_dict_t *pdfioObjGetDict(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern unsigned pdfioObjGetGeneration(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern unsigned pdfioObjGetNumber(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern pdfio_stream_t *pdfioObjGetStream(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern const char *pdfioObjGetType(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern pdfio_stream_t *pdfioObjOpenStream(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern pdfio_obj_t *pdfioPageCopy(pdfio_file_t *pdf, pdfio_obj_t *src) PDFIO_PUBLIC;
extern bool pdfioStreamClose(pdfio_stream_t *st) PDFIO_PUBLIC;
extern bool pdfioStreamGetToken(pdfio_stream_t *st, char *buffer, size_t bufsize) PDFIO_PUBLIC;
extern bool pdfioStreamPrintf(pdfio_stream_t *st, const char *format, ...) PDFIO_PUBLIC PDFIO_FORMAT(2,3);
extern bool pdfioStreamPuts(pdfio_stream_t *st, const char *s) PDFIO_PUBLIC;
extern ssize_t pdfioStreamRead(pdfio_stream_t *st, void *buffer, size_t bytes) PDFIO_PUBLIC;