summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/pdf/document.h2
-rw-r--r--include/mupdf/pdf/parse.h2
-rw-r--r--source/pdf/pdf-parse.c14
-rw-r--r--source/pdf/pdf-repair.c5
-rw-r--r--source/pdf/pdf-xref.c36
5 files changed, 49 insertions, 10 deletions
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h
index cd0c03ab..73b3692d 100644
--- a/include/mupdf/pdf/document.h
+++ b/include/mupdf/pdf/document.h
@@ -218,6 +218,8 @@ struct pdf_document_s
int page_count;
+ int repair_attempted;
+
/* State indicating which file parsing method we are using */
int file_reading_linearly;
int file_length;
diff --git a/include/mupdf/pdf/parse.h b/include/mupdf/pdf/parse.h
index 0dc52a78..0564a748 100644
--- a/include/mupdf/pdf/parse.h
+++ b/include/mupdf/pdf/parse.h
@@ -28,7 +28,7 @@ pdf_token pdf_lex(fz_stream *f, pdf_lexbuf *lexbuf);
pdf_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
pdf_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
pdf_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
-pdf_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf, int *num, int *gen, int *stm_ofs);
+pdf_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf, int *num, int *gen, int *stm_ofs, int *try_repair);
/*
pdf_print_token: print a lexed token to a buffer, growing if necessary
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index aaa45c0d..66e0fbe7 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -496,7 +496,7 @@ pdf_parse_stm_obj(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
pdf_obj *
pdf_parse_ind_obj(pdf_document *doc,
fz_stream *file, pdf_lexbuf *buf,
- int *onum, int *ogen, int *ostmofs)
+ int *onum, int *ogen, int *ostmofs, int *try_repair)
{
pdf_obj *obj = NULL;
int num = 0, gen = 0, stm_ofs;
@@ -508,17 +508,29 @@ pdf_parse_ind_obj(pdf_document *doc,
tok = pdf_lex(file, buf);
if (tok != PDF_TOK_INT)
+ {
+ if (try_repair)
+ *try_repair = 1;
fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number");
+ }
num = buf->i;
tok = pdf_lex(file, buf);
if (tok != PDF_TOK_INT)
+ {
+ if (try_repair)
+ *try_repair = 1;
fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num);
+ }
gen = buf->i;
tok = pdf_lex(file, buf);
if (tok != PDF_TOK_OBJ)
+ {
+ if (try_repair)
+ *try_repair = 1;
fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen);
+ }
tok = pdf_lex(file, buf);
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c
index c742714d..e9a60986 100644
--- a/source/pdf/pdf-repair.c
+++ b/source/pdf/pdf-repair.c
@@ -234,7 +234,6 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen)
}
}
-/* Entered with file locked, remains locked throughout. */
void
pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
{
@@ -267,6 +266,10 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
fz_var(list);
fz_var(obj);
+ if (doc->repair_attempted)
+ fz_throw(doc->ctx, FZ_ERROR_GENERIC, "Repair failed already - not trying again");
+ doc->repair_attempted = 1;
+
doc->dirty = 1;
/* Can't support incremental update after repair */
doc->freeze_updates = 1;
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 338dec94..f3deeebf 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -562,7 +562,7 @@ pdf_read_new_xref(pdf_document *doc, pdf_lexbuf *buf)
{
pdf_xref_entry *entry;
int ofs = fz_tell(doc->file);
- trailer = pdf_parse_ind_obj(doc, doc->file, buf, &num, &gen, &stm_ofs);
+ trailer = pdf_parse_ind_obj(doc, doc->file, buf, &num, &gen, &stm_ofs, NULL);
entry = pdf_get_populating_xref_entry(doc, num);
entry->ofs = ofs;
entry->gen = gen;
@@ -832,7 +832,7 @@ pdf_load_linear(pdf_document *doc)
{
pdf_xref_entry *entry;
- dict = pdf_parse_ind_obj(doc, doc->file, &doc->lexbuf.base, &num, &gen, &stmofs);
+ dict = pdf_parse_ind_obj(doc, doc->file, &doc->lexbuf.base, &num, &gen, &stmofs, NULL);
if (!pdf_is_dict(dict))
fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
o = pdf_dict_gets(dict, "Linearized");
@@ -1611,13 +1611,18 @@ void
pdf_cache_object(pdf_document *doc, int num, int gen)
{
pdf_xref_entry *x;
- int rnum, rgen;
+ int rnum, rgen, try_repair;
fz_context *ctx = doc->ctx;
+ fz_var(try_repair);
+
if (num < 0 || num >= pdf_xref_len(doc))
fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(doc));
object_updated:
+ try_repair = 0;
+ rnum = num;
+
x = pdf_get_xref_entry(doc, num);
if (x->obj)
@@ -1634,18 +1639,35 @@ object_updated:
fz_try(ctx)
{
x->obj = pdf_parse_ind_obj(doc, doc->file, &doc->lexbuf.base,
- &rnum, &rgen, &x->stm_ofs);
+ &rnum, &rgen, &x->stm_ofs, &try_repair);
}
fz_catch(ctx)
{
- fz_rethrow_message(ctx, "cannot parse object (%d %d R)", num, gen);
+ if (!try_repair || fz_caught(ctx) == FZ_ERROR_TRYLATER)
+ fz_rethrow(ctx);
}
- if (rnum != num)
+ if (!try_repair && rnum != num)
{
pdf_drop_obj(x->obj);
x->obj = NULL;
- fz_rethrow_message(ctx, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen);
+ try_repair = 1;
+ }
+
+ if (try_repair)
+ {
+ fz_try(ctx)
+ {
+ pdf_repair_xref(doc, &doc->lexbuf.base);
+ }
+ fz_catch(ctx)
+ {
+ if (rnum == num)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse object (%d %d R)", num, gen);
+ else
+ fz_throw(ctx, FZ_ERROR_GENERIC, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen);
+ }
+ goto object_updated;
}
if (doc->crypt)