From 2ba9166cfb7590e3422d691418353de027276c17 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Thu, 12 Oct 2017 13:58:00 +0200 Subject: Don't cache page count in pdf_document. Make sure any changes to the page tree are always reflected immediately. The rev_page_map lookup cache exists when we load the outlines in order to resolve links faster, so we don't need to worry about that one. The linear_page_refs stuff is more troublesome, so don't mix editing a PDF with progressive loading! --- source/pdf/pdf-clean-file.c | 3 --- source/pdf/pdf-page.c | 21 ++++++++------------- source/pdf/pdf-xref.c | 25 ++++++++++++------------- 3 files changed, 20 insertions(+), 29 deletions(-) (limited to 'source') diff --git a/source/pdf/pdf-clean-file.c b/source/pdf/pdf-clean-file.c index 224a4d03..86a6b6a0 100644 --- a/source/pdf/pdf-clean-file.c +++ b/source/pdf/pdf-clean-file.c @@ -234,9 +234,6 @@ static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) pdf_dict_put_drop(ctx, pages, PDF_NAME_Count, countobj); pdf_dict_put_drop(ctx, pages, PDF_NAME_Kids, kids); - /* Force the next call to pdf_count_pages to recount */ - glo->doc->page_count = 0; - pagecount = pdf_count_pages(ctx, doc); page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums)); for (i = 0; i < pagecount; i++) diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c index de12fb71..90f77f1e 100644 --- a/source/pdf/pdf-page.c +++ b/source/pdf/pdf-page.c @@ -8,9 +8,7 @@ int pdf_count_pages(fz_context *ctx, pdf_document *doc) { - if (doc->page_count == 0) - doc->page_count = pdf_to_int(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages/Count")); - return doc->page_count; + return pdf_to_int(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages/Count")); } static int @@ -28,7 +26,7 @@ pdf_load_page_tree_imp(fz_context *ctx, pdf_document *doc, pdf_obj *node, int id { for (i = 0; i < n; ++i) { - if (idx >= doc->page_count) + if (idx >= doc->rev_page_count) fz_throw(ctx, FZ_ERROR_GENERIC, "too many kids in page tree"); doc->rev_page_map[idx].page = idx; doc->rev_page_map[idx].object = pdf_to_num(ctx, pdf_array_get(ctx, kids, i)); @@ -52,7 +50,7 @@ pdf_load_page_tree_imp(fz_context *ctx, pdf_document *doc, pdf_obj *node, int id } else if (pdf_name_eq(ctx, type, PDF_NAME_Page)) { - if (idx >= doc->page_count) + if (idx >= doc->rev_page_count) fz_throw(ctx, FZ_ERROR_GENERIC, "too many kids in page tree"); doc->rev_page_map[idx].page = idx; doc->rev_page_map[idx].object = pdf_to_num(ctx, node); @@ -78,10 +76,10 @@ pdf_load_page_tree(fz_context *ctx, pdf_document *doc) { if (!doc->rev_page_map) { - int n = pdf_count_pages(ctx, doc); - doc->rev_page_map = fz_malloc_array(ctx, n, sizeof *doc->rev_page_map); + doc->rev_page_count = pdf_count_pages(ctx, doc); + doc->rev_page_map = fz_malloc_array(ctx, doc->rev_page_count, sizeof *doc->rev_page_map); pdf_load_page_tree_imp(ctx, doc, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages"), 0); - qsort(doc->rev_page_map, n, sizeof *doc->rev_page_map, cmp_rev_page_map); + qsort(doc->rev_page_map, doc->rev_page_count, sizeof *doc->rev_page_map, cmp_rev_page_map); } } @@ -90,6 +88,7 @@ pdf_drop_page_tree(fz_context *ctx, pdf_document *doc) { fz_free(ctx, doc->rev_page_map); doc->rev_page_map = NULL; + doc->rev_page_count = 0; } enum @@ -285,7 +284,7 @@ static int pdf_lookup_page_number_fast(fz_context *ctx, pdf_document *doc, int needle) { int l = 0; - int r = doc->page_count - 1; + int r = doc->rev_page_count - 1; while (l <= r) { int m = (l + r) >> 1; @@ -1020,8 +1019,6 @@ pdf_delete_page(fz_context *ctx, pdf_document *doc, int at) pdf_dict_put_drop(ctx, parent, PDF_NAME_Count, pdf_new_int(ctx, doc, count - 1)); parent = pdf_dict_get(ctx, parent, PDF_NAME_Parent); } - - doc->page_count = 0; /* invalidate cached value */ } void @@ -1117,6 +1114,4 @@ pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page_ref) pdf_dict_put_drop(ctx, parent, PDF_NAME_Count, pdf_new_int(ctx, doc, count + 1)); parent = pdf_dict_get(ctx, parent, PDF_NAME_Parent); } - - doc->page_count = 0; /* invalidate cached value */ } diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index 02fe6ee4..265b0c05 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -41,7 +41,6 @@ static void pdf_drop_xref_sections_imp(fz_context *ctx, pdf_document *doc, pdf_x for (e = 0; e < sub->len; e++) { pdf_xref_entry *entry = &sub->table[e]; - if (entry->obj) { pdf_drop_obj(ctx, entry->obj); @@ -1299,9 +1298,9 @@ pdf_load_linear(fz_context *ctx, pdf_document *doc) pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), &doc->lexbuf.base, 0); - doc->page_count = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_N)); - doc->linear_page_refs = fz_resize_array(ctx, doc->linear_page_refs, doc->page_count, sizeof(pdf_obj *)); - memset(doc->linear_page_refs, 0, doc->page_count * sizeof(pdf_obj*)); + doc->linear_page_count = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_N)); + doc->linear_page_refs = fz_resize_array(ctx, doc->linear_page_refs, doc->linear_page_count, sizeof(pdf_obj *)); + memset(doc->linear_page_refs, 0, doc->linear_page_count * sizeof(pdf_obj*)); doc->linear_obj = dict; doc->linear_pos = fz_tell(ctx, doc->file); doc->linear_page1_obj_num = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_O)); @@ -1503,7 +1502,7 @@ pdf_drop_document_imp(fz_context *ctx, pdf_document *doc) pdf_drop_obj(ctx, doc->linear_obj); if (doc->linear_page_refs) { - for (i=0; i < doc->page_count; i++) + for (i=0; i < doc->linear_page_count; i++) pdf_drop_obj(ctx, doc->linear_page_refs[i]); fz_free(ctx, doc->linear_page_refs); @@ -2321,8 +2320,8 @@ pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum) /* Malloc the structures (use realloc to cope with the fact we * may try this several times before enough data is loaded) */ - doc->hint_page = fz_resize_array(ctx, doc->hint_page, doc->page_count+1, sizeof(*doc->hint_page)); - memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->page_count+1)); + doc->hint_page = fz_resize_array(ctx, doc->hint_page, doc->linear_page_count+1, sizeof(*doc->hint_page)); + memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->linear_page_count+1)); doc->hint_obj_offsets = fz_resize_array(ctx, doc->hint_obj_offsets, max_object_num, sizeof(*doc->hint_obj_offsets)); memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num); doc->hint_obj_offsets_max = max_object_num; @@ -2353,7 +2352,7 @@ pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum) /* We don't care about the number of objects in the first page */ (void)fz_read_bits(ctx, stream, page_obj_num_bits); j = 1; - for (i = 1; i < doc->page_count; i++) + for (i = 1; i < doc->linear_page_count; i++) { int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits); @@ -2364,7 +2363,7 @@ pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum) fz_sync_bits(ctx, stream); /* Item 2: Page lengths */ j = doc->hint_page[0].offset; - for (i = 0; i < doc->page_count; i++) + for (i = 0; i < doc->linear_page_count; i++) { int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits); int old = j; @@ -2378,7 +2377,7 @@ pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum) fz_sync_bits(ctx, stream); /* Item 3: Shared references */ shared = 0; - for (i = 0; i < doc->page_count; i++) + for (i = 0; i < doc->linear_page_count; i++) { int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits); doc->hint_page[i].index = shared; @@ -2483,7 +2482,7 @@ pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum) { doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset; } - for (i = 0; i < doc->page_count; i++) + for (i = 0; i < doc->linear_page_count; i++) { doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset; } @@ -2554,8 +2553,8 @@ pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum pdf_load_hinted_page(ctx, doc, pagenum); - if (pagenum < 0 || pagenum >= doc->page_count) - fz_throw(ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->page_count); + if (pagenum < 0 || pagenum >= doc->linear_page_count) + fz_throw(ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->linear_page_count); if (doc->linear_pos == doc->file_length) return doc->linear_page_refs[pagenum]; -- cgit v1.2.3