diff options
-rw-r--r-- | include/mupdf/pdf/document.h | 8 | ||||
-rw-r--r-- | include/mupdf/pdf/page.h | 2 | ||||
-rw-r--r-- | source/pdf/pdf-outline.c | 21 | ||||
-rw-r--r-- | source/pdf/pdf-page.c | 116 | ||||
-rw-r--r-- | source/pdf/pdf-xref.c | 2 |
5 files changed, 137 insertions, 12 deletions
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h index a84ee872..177f947c 100644 --- a/include/mupdf/pdf/document.h +++ b/include/mupdf/pdf/document.h @@ -549,6 +549,13 @@ struct pdf_unsaved_sig_s pdf_unsaved_sig *next; }; +typedef struct pdf_rev_page_map_s pdf_rev_page_map; +struct pdf_rev_page_map_s +{ + int page; + int object; +}; + struct pdf_document_s { fz_document super; @@ -576,6 +583,7 @@ struct pdf_document_s int has_xref_streams; int page_count; + pdf_rev_page_map *rev_page_map; int repair_attempted; diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h index f86d54cc..52d2a4ad 100644 --- a/include/mupdf/pdf/page.h +++ b/include/mupdf/pdf/page.h @@ -4,6 +4,8 @@ int pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *pageobj); int pdf_count_pages(fz_context *ctx, pdf_document *doc); pdf_obj *pdf_lookup_page_obj(fz_context *ctx, pdf_document *doc, int needle); +void pdf_load_page_tree(fz_context *ctx, pdf_document *doc); +void pdf_drop_page_tree(fz_context *ctx, pdf_document *doc); /* pdf_lookup_anchor: Find the page number of a named destination. diff --git a/source/pdf/pdf-outline.c b/source/pdf/pdf-outline.c index c31a1954..7c8cfdaa 100644 --- a/source/pdf/pdf-outline.c +++ b/source/pdf/pdf-outline.c @@ -69,12 +69,21 @@ fz_outline * pdf_load_outline(fz_context *ctx, pdf_document *doc) { pdf_obj *root, *obj, *first; + fz_outline *outline = NULL; - root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); - obj = pdf_dict_get(ctx, root, PDF_NAME_Outlines); - first = pdf_dict_get(ctx, obj, PDF_NAME_First); - if (first) - return pdf_load_outline_imp(ctx, doc, first); + pdf_load_page_tree(ctx, doc); /* cache page tree for fast link destination lookups */ + fz_try(ctx) + { + root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); + obj = pdf_dict_get(ctx, root, PDF_NAME_Outlines); + first = pdf_dict_get(ctx, obj, PDF_NAME_First); + if (first) + outline = pdf_load_outline_imp(ctx, doc, first); + } + fz_always(ctx) + pdf_drop_page_tree(ctx, doc); + fz_catch(ctx) + fz_rethrow(ctx); - return NULL; + return outline; } diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c index 44f652bd..0699c672 100644 --- a/source/pdf/pdf-page.c +++ b/source/pdf/pdf-page.c @@ -4,11 +4,87 @@ int pdf_count_pages(fz_context *ctx, pdf_document *doc) { if (doc->page_count == 0) + doc->page_count = pdf_to_int(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages/Count")); + return doc->page_count; +} + +static int +pdf_load_page_tree_imp(fz_context *ctx, pdf_document *doc, pdf_obj *node, int idx) +{ + pdf_obj *type = pdf_dict_get(ctx, node, PDF_NAME_Type); + if (pdf_name_eq(ctx, type, PDF_NAME_Pages)) { - pdf_obj *count = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages/Count"); - doc->page_count = pdf_to_int(ctx, count); + pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME_Kids); + int count = pdf_to_int(ctx, pdf_dict_get(ctx, node, PDF_NAME_Count)); + int i, n = pdf_array_len(ctx, kids); + + /* if Kids length is same as Count, all children must be page objects */ + if (n == count) + { + for (i = 0; i < n; ++i) + { + if (idx >= doc->page_count) + fz_throw(ctx, FZ_ERROR_GENERIC, "too many kids in page tree"); + doc->rev_page_map[idx].page = idx; + doc->rev_page_map[idx].object = pdf_to_num(ctx, pdf_array_get(ctx, kids, i)); + ++idx; + } + } + + /* else Kids may contain intermediate nodes */ + else + { + if (pdf_mark_obj(ctx, node)) + fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree"); + fz_try(ctx) + for (i = 0; i < n; ++i) + idx = pdf_load_page_tree_imp(ctx, doc, pdf_array_get(ctx, kids, i), idx); + fz_always(ctx) + pdf_unmark_obj(ctx, node); + fz_catch(ctx) + fz_rethrow(ctx); + } + } + else if (pdf_name_eq(ctx, type, PDF_NAME_Page)) + { + if (idx >= doc->page_count) + fz_throw(ctx, FZ_ERROR_GENERIC, "too many kids in page tree"); + doc->rev_page_map[idx].page = idx; + doc->rev_page_map[idx].object = pdf_to_num(ctx, node); + ++idx; + } + else + { + fz_throw(ctx, FZ_ERROR_GENERIC, "non-page object in page tree"); + } + return idx; +} + +static int +cmp_rev_page_map(const void *va, const void *vb) +{ + const pdf_rev_page_map *a = va; + const pdf_rev_page_map *b = vb; + return a->object - b->object; +} + +void +pdf_load_page_tree(fz_context *ctx, pdf_document *doc) +{ + if (!doc->rev_page_map) + { + int n = pdf_count_pages(ctx, doc); + doc->rev_page_map = fz_malloc_array(ctx, n, sizeof *doc->rev_page_map); + pdf_load_page_tree_imp(ctx, doc, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages"), 0); + qsort(doc->rev_page_map, n, sizeof *doc->rev_page_map, cmp_rev_page_map); } - return doc->page_count; +} + +void +pdf_drop_page_tree(fz_context *ctx, pdf_document *doc) +{ + fz_free(ctx, doc->rev_page_map); + doc->rev_page_map = NULL; } enum @@ -40,7 +116,7 @@ pdf_lookup_page_loc_imp(fz_context *ctx, pdf_document *doc, pdf_obj *node, int * len = pdf_array_len(ctx, kids); if (len == 0) - fz_throw(ctx, FZ_ERROR_GENERIC, "Malformed pages tree"); + fz_throw(ctx, FZ_ERROR_GENERIC, "malformed page tree"); /* Every node we need to unmark goes into the stack */ if (stack_len == stack_max) @@ -158,8 +234,8 @@ pdf_count_pages_before_kid(fz_context *ctx, pdf_document *doc, pdf_obj *parent, fz_throw(ctx, FZ_ERROR_GENERIC, "kid not found in parent's kids array"); } -int -pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *node) +static int +pdf_lookup_page_number_slow(fz_context *ctx, pdf_document *doc, pdf_obj *node) { int needle = pdf_to_num(ctx, node); int total = 0; @@ -200,6 +276,34 @@ pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *node) return total; } +static int +pdf_lookup_page_number_fast(fz_context *ctx, pdf_document *doc, int needle) +{ + int l = 0; + int r = doc->page_count - 1; + while (l <= r) + { + int m = (l + r) >> 1; + int c = needle - doc->rev_page_map[m].object; + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return doc->rev_page_map[m].page; + } + return -1; +} + +int +pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *page) +{ + if (doc->rev_page_map) + return pdf_lookup_page_number_fast(ctx, doc, pdf_to_num(ctx, page)); + else + return pdf_lookup_page_number_slow(ctx, doc, page); +} + int pdf_lookup_anchor(fz_context *ctx, pdf_document *doc, const char *name, float *xp, float *yp) { diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index 739a5693..e7278ae8 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -1528,6 +1528,8 @@ pdf_drop_document_imp(fz_context *ctx, pdf_document *doc) pdf_drop_obj(ctx, doc->orphans[i]); fz_free(ctx, doc->orphans); + + fz_free(ctx, doc->rev_page_map); } fz_always(ctx) { |