summaryrefslogtreecommitdiff
path: root/source/pdf/pdf-xref.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2015-10-01 18:05:46 +0100
committerRobin Watts <robin.watts@artifex.com>2015-10-02 15:53:19 +0100
commit1acaaf2b40614401378aa697de47093be9f390fe (patch)
tree884597d2a9345e93aa55f19aba40b9d2f40f8100 /source/pdf/pdf-xref.c
parent1cf9a6c5092adce610f3e6b33219e09b4383249f (diff)
downloadmupdf-1acaaf2b40614401378aa697de47093be9f390fe.tar.xz
Bug 696129: Be more forgiving of broken files.
The PDF spec says that old format xrefs should start with: xref\n<start> <len> The example file in question has: xref <start> <len> which confuses our parsing code. Update the parse code to avoid using fz_read_line, and to instead work on a char level. Also, downgrade the error given when the first object is not free to be a warning. Now we do 'just in time' repair, we are probably better able to cope.
Diffstat (limited to 'source/pdf/pdf-xref.c')
-rw-r--r--source/pdf/pdf-xref.c37
1 files changed, 32 insertions, 5 deletions
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 727dcda7..7c722c9c 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -587,6 +587,31 @@ pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find startxref");
}
+static void
+fz_skip_space(fz_context *ctx, fz_stream *stm)
+{
+ do
+ {
+ int c = fz_peek_byte(ctx, stm);
+ if (c > 32 && c != EOF)
+ return;
+ (void)fz_read_byte(ctx, stm);
+ }
+ while (1);
+}
+
+static int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str)
+{
+ while (*str)
+ {
+ int c = fz_peek_byte(ctx, stm);
+ if (c == EOF || c != *str++)
+ return 1;
+ (void)fz_read_byte(ctx, stm);
+ }
+ return 0;
+}
+
/*
* trailer dictionary
*/
@@ -608,9 +633,10 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
/* Record the current file read offset so that we can reinstate it */
ofs = fz_tell(ctx, doc->file);
- fz_read_line(ctx, doc->file, buf->scratch, buf->size);
- if (strncmp(buf->scratch, "xref", 4) != 0)
+ fz_skip_space(ctx, doc->file);
+ if (fz_skip_string(ctx, doc->file, "xref"))
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
+ fz_skip_space(ctx, doc->file);
while (1)
{
@@ -752,9 +778,10 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf);
pdf_xref_entry *table;
- fz_read_line(ctx, file, buf->scratch, buf->size);
- if (strncmp(buf->scratch, "xref", 4) != 0)
+ fz_skip_space(ctx, doc->file);
+ if (fz_skip_string(ctx, doc->file, "xref"))
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
+ fz_skip_space(ctx, doc->file);
while (1)
{
@@ -1151,7 +1178,7 @@ pdf_load_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
}
/* broken pdfs where first object is not free */
else if (entry->type != 'f')
- fz_throw(ctx, FZ_ERROR_GENERIC, "first object in xref is not free");
+ fz_warn(ctx, "first object in xref is not free");
/* broken pdfs where object offsets are out of range */
xref_len = pdf_xref_len(ctx, doc);