summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/pdf/document.h8
-rw-r--r--include/mupdf/pdf/page.h2
-rw-r--r--source/pdf/pdf-outline.c21
-rw-r--r--source/pdf/pdf-page.c116
-rw-r--r--source/pdf/pdf-xref.c2
5 files changed, 137 insertions, 12 deletions
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h
index a84ee872..177f947c 100644
--- a/include/mupdf/pdf/document.h
+++ b/include/mupdf/pdf/document.h
@@ -549,6 +549,13 @@ struct pdf_unsaved_sig_s
pdf_unsaved_sig *next;
};
+typedef struct pdf_rev_page_map_s pdf_rev_page_map;
+struct pdf_rev_page_map_s
+{
+ int page;
+ int object;
+};
+
struct pdf_document_s
{
fz_document super;
@@ -576,6 +583,7 @@ struct pdf_document_s
int has_xref_streams;
int page_count;
+ pdf_rev_page_map *rev_page_map;
int repair_attempted;
diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h
index f86d54cc..52d2a4ad 100644
--- a/include/mupdf/pdf/page.h
+++ b/include/mupdf/pdf/page.h
@@ -4,6 +4,8 @@
int pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *pageobj);
int pdf_count_pages(fz_context *ctx, pdf_document *doc);
pdf_obj *pdf_lookup_page_obj(fz_context *ctx, pdf_document *doc, int needle);
+void pdf_load_page_tree(fz_context *ctx, pdf_document *doc);
+void pdf_drop_page_tree(fz_context *ctx, pdf_document *doc);
/*
pdf_lookup_anchor: Find the page number of a named destination.
diff --git a/source/pdf/pdf-outline.c b/source/pdf/pdf-outline.c
index c31a1954..7c8cfdaa 100644
--- a/source/pdf/pdf-outline.c
+++ b/source/pdf/pdf-outline.c
@@ -69,12 +69,21 @@ fz_outline *
pdf_load_outline(fz_context *ctx, pdf_document *doc)
{
pdf_obj *root, *obj, *first;
+ fz_outline *outline = NULL;
- root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
- obj = pdf_dict_get(ctx, root, PDF_NAME_Outlines);
- first = pdf_dict_get(ctx, obj, PDF_NAME_First);
- if (first)
- return pdf_load_outline_imp(ctx, doc, first);
+ pdf_load_page_tree(ctx, doc); /* cache page tree for fast link destination lookups */
+ fz_try(ctx)
+ {
+ root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
+ obj = pdf_dict_get(ctx, root, PDF_NAME_Outlines);
+ first = pdf_dict_get(ctx, obj, PDF_NAME_First);
+ if (first)
+ outline = pdf_load_outline_imp(ctx, doc, first);
+ }
+ fz_always(ctx)
+ pdf_drop_page_tree(ctx, doc);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
- return NULL;
+ return outline;
}
diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c
index 44f652bd..0699c672 100644
--- a/source/pdf/pdf-page.c
+++ b/source/pdf/pdf-page.c
@@ -4,11 +4,87 @@ int
pdf_count_pages(fz_context *ctx, pdf_document *doc)
{
if (doc->page_count == 0)
+ doc->page_count = pdf_to_int(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages/Count"));
+ return doc->page_count;
+}
+
+static int
+pdf_load_page_tree_imp(fz_context *ctx, pdf_document *doc, pdf_obj *node, int idx)
+{
+ pdf_obj *type = pdf_dict_get(ctx, node, PDF_NAME_Type);
+ if (pdf_name_eq(ctx, type, PDF_NAME_Pages))
{
- pdf_obj *count = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages/Count");
- doc->page_count = pdf_to_int(ctx, count);
+ pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME_Kids);
+ int count = pdf_to_int(ctx, pdf_dict_get(ctx, node, PDF_NAME_Count));
+ int i, n = pdf_array_len(ctx, kids);
+
+ /* if Kids length is same as Count, all children must be page objects */
+ if (n == count)
+ {
+ for (i = 0; i < n; ++i)
+ {
+ if (idx >= doc->page_count)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "too many kids in page tree");
+ doc->rev_page_map[idx].page = idx;
+ doc->rev_page_map[idx].object = pdf_to_num(ctx, pdf_array_get(ctx, kids, i));
+ ++idx;
+ }
+ }
+
+ /* else Kids may contain intermediate nodes */
+ else
+ {
+ if (pdf_mark_obj(ctx, node))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree");
+ fz_try(ctx)
+ for (i = 0; i < n; ++i)
+ idx = pdf_load_page_tree_imp(ctx, doc, pdf_array_get(ctx, kids, i), idx);
+ fz_always(ctx)
+ pdf_unmark_obj(ctx, node);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ }
+ }
+ else if (pdf_name_eq(ctx, type, PDF_NAME_Page))
+ {
+ if (idx >= doc->page_count)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "too many kids in page tree");
+ doc->rev_page_map[idx].page = idx;
+ doc->rev_page_map[idx].object = pdf_to_num(ctx, node);
+ ++idx;
+ }
+ else
+ {
+ fz_throw(ctx, FZ_ERROR_GENERIC, "non-page object in page tree");
+ }
+ return idx;
+}
+
+static int
+cmp_rev_page_map(const void *va, const void *vb)
+{
+ const pdf_rev_page_map *a = va;
+ const pdf_rev_page_map *b = vb;
+ return a->object - b->object;
+}
+
+void
+pdf_load_page_tree(fz_context *ctx, pdf_document *doc)
+{
+ if (!doc->rev_page_map)
+ {
+ int n = pdf_count_pages(ctx, doc);
+ doc->rev_page_map = fz_malloc_array(ctx, n, sizeof *doc->rev_page_map);
+ pdf_load_page_tree_imp(ctx, doc, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages"), 0);
+ qsort(doc->rev_page_map, n, sizeof *doc->rev_page_map, cmp_rev_page_map);
}
- return doc->page_count;
+}
+
+void
+pdf_drop_page_tree(fz_context *ctx, pdf_document *doc)
+{
+ fz_free(ctx, doc->rev_page_map);
+ doc->rev_page_map = NULL;
}
enum
@@ -40,7 +116,7 @@ pdf_lookup_page_loc_imp(fz_context *ctx, pdf_document *doc, pdf_obj *node, int *
len = pdf_array_len(ctx, kids);
if (len == 0)
- fz_throw(ctx, FZ_ERROR_GENERIC, "Malformed pages tree");
+ fz_throw(ctx, FZ_ERROR_GENERIC, "malformed page tree");
/* Every node we need to unmark goes into the stack */
if (stack_len == stack_max)
@@ -158,8 +234,8 @@ pdf_count_pages_before_kid(fz_context *ctx, pdf_document *doc, pdf_obj *parent,
fz_throw(ctx, FZ_ERROR_GENERIC, "kid not found in parent's kids array");
}
-int
-pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *node)
+static int
+pdf_lookup_page_number_slow(fz_context *ctx, pdf_document *doc, pdf_obj *node)
{
int needle = pdf_to_num(ctx, node);
int total = 0;
@@ -200,6 +276,34 @@ pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *node)
return total;
}
+static int
+pdf_lookup_page_number_fast(fz_context *ctx, pdf_document *doc, int needle)
+{
+ int l = 0;
+ int r = doc->page_count - 1;
+ while (l <= r)
+ {
+ int m = (l + r) >> 1;
+ int c = needle - doc->rev_page_map[m].object;
+ if (c < 0)
+ r = m - 1;
+ else if (c > 0)
+ l = m + 1;
+ else
+ return doc->rev_page_map[m].page;
+ }
+ return -1;
+}
+
+int
+pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *page)
+{
+ if (doc->rev_page_map)
+ return pdf_lookup_page_number_fast(ctx, doc, pdf_to_num(ctx, page));
+ else
+ return pdf_lookup_page_number_slow(ctx, doc, page);
+}
+
int
pdf_lookup_anchor(fz_context *ctx, pdf_document *doc, const char *name, float *xp, float *yp)
{
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 739a5693..e7278ae8 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -1528,6 +1528,8 @@ pdf_drop_document_imp(fz_context *ctx, pdf_document *doc)
pdf_drop_obj(ctx, doc->orphans[i]);
fz_free(ctx, doc->orphans);
+
+ fz_free(ctx, doc->rev_page_map);
}
fz_always(ctx)
{