diff options
-rw-r--r-- | include/mupdf/pdf/document.h | 1 | ||||
-rw-r--r-- | include/mupdf/pdf/xref.h | 2 | ||||
-rw-r--r-- | source/pdf/pdf-repair.c | 3 | ||||
-rw-r--r-- | source/pdf/pdf-xref.c | 82 |
4 files changed, 78 insertions, 10 deletions
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h index c9851a47..f208b549 100644 --- a/include/mupdf/pdf/document.h +++ b/include/mupdf/pdf/document.h @@ -213,6 +213,7 @@ struct pdf_document_s int max_xref_len; int num_xref_sections; pdf_xref *xref_sections; + int *xref_index; int xref_altered; int freeze_updates; int has_xref_streams; diff --git a/include/mupdf/pdf/xref.h b/include/mupdf/pdf/xref.h index ac8525ac..63580723 100644 --- a/include/mupdf/pdf/xref.h +++ b/include/mupdf/pdf/xref.h @@ -96,7 +96,7 @@ void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n); void pdf_xref_ensure_incremental_object(pdf_document *doc, int num); int pdf_xref_is_incremental(pdf_document *doc, int num); -void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf); +void pdf_repair_xref(pdf_document *doc); void pdf_repair_obj_stms(pdf_document *doc); pdf_obj *pdf_new_ref(pdf_document *doc, pdf_obj *obj); void pdf_ensure_solid_xref(pdf_document *doc, int num); diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c index 7e35c5fb..3bd3850a 100644 --- a/source/pdf/pdf-repair.c +++ b/source/pdf/pdf-repair.c @@ -235,7 +235,7 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen) } void -pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) +pdf_repair_xref(pdf_document *doc) { pdf_obj *dict, *obj = NULL; pdf_obj *length; @@ -258,6 +258,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) int next; int i, n, c; fz_context *ctx = doc->ctx; + pdf_lexbuf *buf = &doc->lexbuf.base; fz_var(encrypt); fz_var(id); diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index 0444085f..c99f8350 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -57,6 +57,19 @@ static void pdf_free_xref_sections(pdf_document *doc) doc->num_xref_sections = 0; } +static void +extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen) +{ + int i; + + doc->xref_index = fz_resize_array(ctx, doc->xref_index, newlen, sizeof(int)); + for (i = doc->max_xref_len; i < newlen; i++) + { + doc->xref_index[i] = 0; + } + doc->max_xref_len = newlen; +} + /* This is only ever called when we already have an incremental * xref. This means there will only be 1 subsec, and it will be * a complete subsec. */ @@ -84,7 +97,7 @@ static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen) xref->num_objects = newlen; sub->len = newlen; if (doc->max_xref_len < newlen) - doc->max_xref_len = newlen; + extend_xref_index(ctx, doc, newlen); } static void pdf_populate_next_xref_level(pdf_document *doc) @@ -174,7 +187,7 @@ ensure_solid_xref(pdf_document *doc, int num, int which) xref->num_objects = num; xref->subsec = new_sub; if (doc->max_xref_len < num) - doc->max_xref_len = num; + extend_xref_index(ctx, doc, num); } /* Used while reading the individual xref sections from a file */ @@ -225,8 +238,13 @@ pdf_xref_entry *pdf_get_xref_entry(pdf_document *doc, int i) if (i < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "Negative object number requested"); + if (i <= doc->max_xref_len) + j = doc->xref_index[i]; + else + j = 0; + /* Find the first xref section where the entry is defined. */ - for (j = 0; j < doc->num_xref_sections; j++) + for (; j < doc->num_xref_sections; j++) { xref = &doc->xref_sections[j]; @@ -241,13 +259,17 @@ pdf_xref_entry *pdf_get_xref_entry(pdf_document *doc, int i) entry = &sub->table[i - sub->start]; if (entry->type) + { + doc->xref_index[i] = j; return entry; + } } } } /* Didn't find the entry in any section. Return the entry from * the final section. */ + doc->xref_index[i] = 0; if (i < xref->num_objects) { xref = &doc->xref_sections[0]; @@ -284,6 +306,7 @@ static void ensure_incremental_xref(pdf_document *doc) pdf_xref_entry *new_table = fz_calloc(ctx, xref->num_objects, sizeof(pdf_xref_entry)); pdf_xref_subsec *sub; pdf_obj *trailer = NULL; + int i; fz_var(trailer); fz_try(ctx) @@ -311,6 +334,12 @@ static void ensure_incremental_xref(pdf_document *doc) pdf_drop_obj(trailer); fz_rethrow(ctx); } + + /* Update the xref_index */ + for (i = 0; i < doc->max_xref_len; i++) + { + doc->xref_index[i]++; + } } } @@ -331,6 +360,7 @@ static pdf_xref_entry *pdf_get_incremental_xref_entry(pdf_document *doc, int i) sub = xref->subsec; assert(sub != NULL && sub->next == NULL); assert(i >= sub->start && i < sub->start + sub->len); + doc->xref_index[i] = 0; return &sub->table[i - sub->start]; } @@ -365,7 +395,7 @@ void pdf_xref_ensure_incremental_object(pdf_document *doc, int num) ensure_incremental_xref(doc); /* Search for the section that contains this object */ - for (i = 0; i < doc->num_xref_sections; i++) + for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++) { pdf_xref *xref = &doc->xref_sections[i]; @@ -386,6 +416,7 @@ void pdf_xref_ensure_incremental_object(pdf_document *doc, int num) return; /* Move the object to the incremental section */ + doc->xref_index[num] = 0; old_entry = &sub->table[num - sub->start]; new_entry = pdf_get_incremental_xref_entry(doc, num); *new_entry = *old_entry; @@ -396,13 +427,14 @@ void pdf_xref_ensure_incremental_object(pdf_document *doc, int num) void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n) { fz_context *ctx = doc->ctx; - pdf_xref *xref; + pdf_xref *xref = NULL; pdf_xref_subsec *sub; pdf_obj *trailer = pdf_keep_obj(pdf_trailer(doc)); fz_var(xref); fz_try(ctx) { + doc->xref_index = fz_calloc(ctx, n, sizeof(int)); xref = fz_malloc_struct(ctx, pdf_xref); sub = fz_malloc_struct(ctx, pdf_xref_subsec); @@ -420,6 +452,8 @@ void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n) doc->xref_sections = xref; doc->num_xref_sections = 1; doc->max_xref_len = n; + + memset(doc->xref_index, 0, sizeof(int)*doc->max_xref_len); } fz_catch(ctx) { @@ -619,7 +653,7 @@ pdf_xref_find_subsection(pdf_document *doc, int ofs, int len) } xref->num_objects = new_max; if (doc->max_xref_len < new_max) - doc->max_xref_len = new_max; + extend_xref_index(ctx, doc, new_max); } else { @@ -988,6 +1022,32 @@ pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_pre } } +static void +pdf_prime_xref_index(pdf_document *doc) +{ + int i, j; + int *idx = doc->xref_index; + + for (i = doc->num_xref_sections-1; i >= 0; i--) + { + pdf_xref *xref = &doc->xref_sections[i]; + pdf_xref_subsec *subsec = xref->subsec; + while (subsec != NULL) + { + int start = subsec->start; + int end = subsec->start + subsec->len; + for (j = start; j < end; j++) + { + char t = subsec->table[j-start].type; + if (t != 0 && t != 'f') + idx[j] = i; + } + + subsec = subsec->next; + } + } +} + /* * load xref tables from pdf * @@ -1009,6 +1069,8 @@ pdf_load_xref(pdf_document *doc, pdf_lexbuf *buf) if (pdf_xref_len(doc) == 0) fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty"); + pdf_prime_xref_index(doc); + entry = pdf_get_xref_entry(doc, 0); /* broken pdfs where first object is missing */ if (!entry->type) @@ -1308,7 +1370,10 @@ pdf_init_document(pdf_document *doc) int hasroot, hasinfo; if (repaired) - pdf_repair_xref(doc, &doc->lexbuf.base); + { + pdf_repair_xref(doc); + pdf_prime_xref_index(doc); + } encrypt = pdf_dict_gets(pdf_trailer(doc), "Encrypt"); id = pdf_dict_gets(pdf_trailer(doc), "ID"); @@ -1888,7 +1953,8 @@ object_updated: { fz_try(ctx) { - pdf_repair_xref(doc, &doc->lexbuf.base); + pdf_repair_xref(doc); + pdf_prime_xref_index(doc); } fz_catch(ctx) { |