summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/pdf/document.h6
-rw-r--r--include/mupdf/pdf/page.h1
-rw-r--r--source/pdf/pdf-page.c361
-rw-r--r--source/pdf/pdf-write.c16
-rw-r--r--source/pdf/pdf-xref.c16
-rw-r--r--source/tools/pdfclean.c4
-rw-r--r--source/tools/pdfinfo.c8
-rw-r--r--source/tools/pdfposter.c2
-rw-r--r--source/tools/pdfshow.c2
9 files changed, 194 insertions, 222 deletions
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h
index c65fe893..9a23fd7e 100644
--- a/include/mupdf/pdf/document.h
+++ b/include/mupdf/pdf/document.h
@@ -187,12 +187,8 @@ struct pdf_document_s
int xref_altered;
int freeze_updates;
- int page_len;
- int page_cap;
- pdf_obj **page_objs;
- pdf_obj **page_refs;
+ int page_count;
int resources_localised;
- int needs_page_tree_rebuild;
pdf_lexbuf_large lexbuf;
diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h
index 7eaa93d9..c1fffa57 100644
--- a/include/mupdf/pdf/page.h
+++ b/include/mupdf/pdf/page.h
@@ -3,6 +3,7 @@
int pdf_lookup_page_number(pdf_document *doc, pdf_obj *pageobj);
int pdf_count_pages(pdf_document *doc);
+pdf_obj *pdf_lookup_page_obj(pdf_document *doc, int needle);
/*
pdf_load_page: Load a page and its resources.
diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c
index 2e14bf6c..c58995d6 100644
--- a/source/pdf/pdf-page.c
+++ b/source/pdf/pdf-page.c
@@ -1,180 +1,163 @@
#include "mupdf/pdf.h"
-struct info
-{
- pdf_obj *resources;
- pdf_obj *mediabox;
- pdf_obj *cropbox;
- pdf_obj *rotate;
-};
-
-typedef struct pdf_page_load_s pdf_page_load;
-
-struct pdf_page_load_s
+int
+pdf_count_pages(pdf_document *doc)
{
- int max;
- int pos;
- pdf_obj *node;
- pdf_obj *kids;
- struct info info;
-};
+ if (doc->page_count == 0)
+ {
+ pdf_obj *count = pdf_dict_getp(pdf_trailer(doc), "Root/Pages/Count");
+ doc->page_count = pdf_to_int(count);
+ }
+ return doc->page_count;
+}
-static void
-pdf_load_page_tree_node(pdf_document *doc, pdf_obj *node, struct info info)
+static pdf_obj *
+pdf_lookup_page_loc_imp(pdf_document *doc, pdf_obj *node, int *skip, pdf_obj **parentp, int *indexp)
{
- pdf_obj *dict, *kids, *count;
- pdf_obj *obj;
fz_context *ctx = doc->ctx;
- pdf_page_load *stack = NULL;
- int stacklen = -1;
- int stackmax = 0;
+ pdf_obj *kids, *hit;
+ int i, len, count;
+
+ count = pdf_to_int(pdf_dict_gets(node, "Count"));
+ if (*skip > count)
+ {
+ /* The princess is in another castle. */
+ *skip -= count;
+ return NULL;
+ }
+
+ kids = pdf_dict_gets(node, "Kids");
+ len = pdf_array_len(kids);
+
+ if (pdf_mark_obj(node))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree");
+
+ hit = NULL;
+ fz_var(hit);
fz_try(ctx)
{
- do
+ for (i = 0; i < len; i++)
{
- if (!node || pdf_mark_obj(node))
- {
- /* NULL node, or we've been here before.
- * Nothing to do. */
- }
- else
+ pdf_obj *kid = pdf_array_get(kids, i);
+ char *type = pdf_to_name(pdf_dict_gets(kid, "Type"));
+ if (!strcmp(type, "Page"))
{
- kids = pdf_dict_gets(node, "Kids");
- count = pdf_dict_gets(node, "Count");
- if (pdf_is_array(kids) && pdf_is_int(count))
+ if (*skip == 0)
{
- /* Push this onto the stack */
- obj = pdf_dict_gets(node, "Resources");
- if (obj)
- info.resources = obj;
- obj = pdf_dict_gets(node, "MediaBox");
- if (obj)
- info.mediabox = obj;
- obj = pdf_dict_gets(node, "CropBox");
- if (obj)
- info.cropbox = obj;
- obj = pdf_dict_gets(node, "Rotate");
- if (obj)
- info.rotate = obj;
- stacklen++;
- if (stacklen == stackmax)
- {
- stack = fz_resize_array(ctx, stack, stackmax ? stackmax*2 : 10, sizeof(*stack));
- stackmax = stackmax ? stackmax*2 : 10;
- }
- stack[stacklen].kids = kids;
- stack[stacklen].node = node;
- stack[stacklen].pos = -1;
- stack[stacklen].max = pdf_array_len(kids);
- stack[stacklen].info = info;
+ if (parentp) *parentp = node;
+ if (indexp) *indexp = i;
+ hit = kid;
+ break;
}
- else if ((dict = pdf_to_dict(node)) != NULL)
+ else
{
- if (info.resources && !pdf_dict_gets(dict, "Resources"))
- pdf_dict_puts(dict, "Resources", info.resources);
- if (info.mediabox && !pdf_dict_gets(dict, "MediaBox"))
- pdf_dict_puts(dict, "MediaBox", info.mediabox);
- if (info.cropbox && !pdf_dict_gets(dict, "CropBox"))
- pdf_dict_puts(dict, "CropBox", info.cropbox);
- if (info.rotate && !pdf_dict_gets(dict, "Rotate"))
- pdf_dict_puts(dict, "Rotate", info.rotate);
-
- if (doc->page_len == doc->page_cap)
- {
- fz_warn(ctx, "found more pages than expected");
- doc->page_refs = fz_resize_array(ctx, doc->page_refs, doc->page_cap+1, sizeof(pdf_obj*));
- doc->page_objs = fz_resize_array(ctx, doc->page_objs, doc->page_cap+1, sizeof(pdf_obj*));
- doc->page_cap ++;
- }
-
- doc->page_refs[doc->page_len] = pdf_keep_obj(node);
- doc->page_objs[doc->page_len] = pdf_keep_obj(dict);
- doc->page_len ++;
- pdf_unmark_obj(node);
+ (*skip)--;
}
}
- /* Get the next node */
- if (stacklen < 0)
- break;
- while (++stack[stacklen].pos == stack[stacklen].max)
+ else if (!strcmp(type, "Pages"))
{
- pdf_unmark_obj(stack[stacklen].node);
- stacklen--;
- if (stacklen < 0) /* No more to pop! */
+ hit = pdf_lookup_page_loc_imp(doc, kid, skip, parentp, indexp);
+ if (hit)
break;
- node = stack[stacklen].node;
- info = stack[stacklen].info;
- pdf_unmark_obj(node); /* Unmark it, cos we're about to mark it again */
}
- if (stacklen >= 0)
- node = pdf_array_get(stack[stacklen].kids, stack[stacklen].pos);
+ else
+ {
+ fz_throw(ctx, FZ_ERROR_GENERIC, "non-page object in page tree");
+ }
}
- while (stacklen >= 0);
}
fz_always(ctx)
{
- while (stacklen >= 0)
- pdf_unmark_obj(stack[stacklen--].node);
- fz_free(ctx, stack);
+ pdf_unmark_obj(node);
}
fz_catch(ctx)
{
fz_rethrow(ctx);
}
+
+ return hit;
}
-static void
-pdf_load_page_tree(pdf_document *doc)
+pdf_obj *
+pdf_lookup_page_loc(pdf_document *doc, int needle, pdf_obj **parentp, int *indexp)
{
- fz_context *ctx = doc->ctx;
- pdf_obj *catalog;
- pdf_obj *pages;
- pdf_obj *count;
- struct info info;
-
- if (doc->page_refs)
- return;
-
- catalog = pdf_dict_gets(pdf_trailer(doc), "Root");
- pages = pdf_dict_gets(catalog, "Pages");
- count = pdf_dict_gets(pages, "Count");
-
- if (!pdf_is_dict(pages))
- fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree");
- if (!pdf_is_int(count) || pdf_to_int(count) < 0)
- fz_throw(ctx, FZ_ERROR_GENERIC, "missing page count");
-
- doc->page_cap = pdf_to_int(count);
- doc->page_len = 0;
- doc->page_refs = fz_malloc_array(ctx, doc->page_cap, sizeof(pdf_obj*));
- doc->page_objs = fz_malloc_array(ctx, doc->page_cap, sizeof(pdf_obj*));
+ pdf_obj *root = pdf_dict_gets(pdf_trailer(doc), "Root");
+ pdf_obj *node = pdf_dict_gets(root, "Pages");
+ int skip = needle;
+ pdf_obj *hit = pdf_lookup_page_loc_imp(doc, node, &skip, parentp, indexp);
+ if (!hit)
+ fz_throw(doc->ctx, FZ_ERROR_GENERIC, "cannot find page %d in page tree", needle);
+ return hit;
+}
- info.resources = NULL;
- info.mediabox = NULL;
- info.cropbox = NULL;
- info.rotate = NULL;
+pdf_obj *
+pdf_lookup_page_obj(pdf_document *doc, int needle)
+{
+ return pdf_lookup_page_loc(doc, needle, NULL, NULL);
+}
- pdf_load_page_tree_node(doc, pages, info);
+static int
+pdf_count_pages_before_kid(pdf_document *doc, pdf_obj *parent, int kid_num)
+{
+ pdf_obj *count, *kid, *kids = pdf_dict_gets(parent, "Kids");
+ int i, total = 0, len = pdf_array_len(kids);
+ for (i = 0; i < len; i++)
+ {
+ kid = pdf_array_get(kids, i);
+ if (pdf_to_num(kid) == kid_num)
+ return total;
+ count = pdf_dict_gets(kid, "Count");
+ if (count)
+ total += pdf_to_int(count);
+ else
+ total++;
+ }
+ fz_throw(doc->ctx, FZ_ERROR_GENERIC, "kid not found in parent's kids array");
}
int
-pdf_count_pages(pdf_document *doc)
+pdf_lookup_page_number(pdf_document *doc, pdf_obj *node)
{
- pdf_load_page_tree(doc);
- return doc->page_len;
+ int needle = pdf_to_num(node);
+ int total = 0;
+ int depth = 0;
+ pdf_obj *parent;
+
+ parent = pdf_dict_gets(node, "Parent");
+ while (parent)
+ {
+ total += pdf_count_pages_before_kid(doc, parent, needle);
+ needle = pdf_to_num(parent);
+ parent = pdf_dict_gets(parent, "Parent");
+ if (++depth > 100)
+ fz_throw(doc->ctx, FZ_ERROR_GENERIC, "page tree is too deep");
+ }
+
+ return total;
}
-int
-pdf_lookup_page_number(pdf_document *doc, pdf_obj *page)
+static pdf_obj *
+pdf_lookup_inherited_page_item(pdf_document *doc, pdf_obj *node, const char *key)
{
- int i, num = pdf_to_num(page);
+ int depth = 0;
+
+ pdf_obj *val = pdf_dict_gets(node, key);
+ if (val)
+ return val;
- pdf_load_page_tree(doc);
- for (i = 0; i < doc->page_len; i++)
- if (num == pdf_to_num(doc->page_refs[i]))
- return i;
- return -1;
+ node = pdf_dict_gets(node, "Parent");
+ while (node)
+ {
+ val = pdf_dict_gets(node, key);
+ if (val)
+ return val;
+ node = pdf_dict_gets(node, "Parent");
+ if (++depth > 100)
+ fz_throw(doc->ctx, FZ_ERROR_GENERIC, "page tree is too deep");
+ }
+
+ return NULL;
}
/* We need to know whether to install a page-level transparency group */
@@ -321,12 +304,8 @@ pdf_load_page(pdf_document *doc, int number)
float userunit;
fz_matrix mat;
- pdf_load_page_tree(doc);
- if (number < 0 || number >= doc->page_len)
- fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find page %d", number + 1);
-
- pageobj = doc->page_objs[number];
- pageref = doc->page_refs[number];
+ pageref = pdf_lookup_page_obj(doc, number);
+ pageobj = pdf_resolve_indirect(pageref);
page = fz_malloc_struct(ctx, pdf_page);
page->resources = NULL;
@@ -344,7 +323,7 @@ pdf_load_page(pdf_document *doc, int number)
else
userunit = 1;
- pdf_to_rect(ctx, pdf_dict_gets(pageobj, "MediaBox"), &mediabox);
+ pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "MediaBox"), &mediabox);
if (fz_is_empty_rect(&mediabox))
{
fz_warn(ctx, "cannot find page size for page %d", number + 1);
@@ -354,7 +333,7 @@ pdf_load_page(pdf_document *doc, int number)
mediabox.y1 = 792;
}
- pdf_to_rect(ctx, pdf_dict_gets(pageobj, "CropBox"), &cropbox);
+ pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "CropBox"), &cropbox);
if (!fz_is_empty_rect(&cropbox))
fz_intersect_rect(&mediabox, &cropbox);
@@ -369,7 +348,7 @@ pdf_load_page(pdf_document *doc, int number)
page->mediabox = fz_unit_rect;
}
- page->rotate = pdf_to_int(pdf_dict_gets(pageobj, "Rotate"));
+ page->rotate = pdf_to_int(pdf_lookup_inherited_page_item(doc, pageobj, "Rotate"));
/* Snap page->rotate to 0, 90, 180 or 270 */
if (page->rotate < 0)
page->rotate = 360 - ((-page->rotate) % 360);
@@ -401,7 +380,8 @@ pdf_load_page(pdf_document *doc, int number)
pdf_load_transition(doc, page, obj);
}
- page->resources = pdf_dict_gets(pageobj, "Resources");
+ // TODO: inherit
+ page->resources = pdf_lookup_inherited_page_item(doc, pageobj, "Resources");
if (page->resources)
pdf_keep_obj(page->resources);
@@ -469,68 +449,67 @@ pdf_free_page(pdf_document *doc, pdf_page *page)
}
void
-pdf_delete_page(pdf_document *doc, int page)
-{
- pdf_delete_page_range(doc, page, page+1);
-}
-
-void
-pdf_delete_page_range(pdf_document *doc, int start, int end)
+pdf_delete_page(pdf_document *doc, int at)
{
+ pdf_obj *parent, *kids;
int i;
- if (start > end)
- {
- int tmp = start;
- start = end;
- end = tmp;
- }
+ pdf_lookup_page_loc(doc, at, &parent, &i);
+ kids = pdf_dict_gets(parent, "Kids");
+ pdf_array_delete(kids, i);
- if (!doc || start >= doc->page_len || end < 0)
- return;
-
- for (i=start; i < end; i++)
- pdf_drop_obj(doc->page_refs[i]);
- if (doc->page_len > end)
+ while (parent)
{
- memmove(&doc->page_refs[start], &doc->page_refs[end], sizeof(pdf_page *) * (doc->page_len - end + start));
- memmove(&doc->page_refs[start], &doc->page_refs[end], sizeof(pdf_page *) * (doc->page_len - end + start));
+ int count = pdf_to_int(pdf_dict_gets(parent, "Count"));
+ pdf_dict_puts_drop(parent, "Count", pdf_new_int(doc, count - 1));
+ parent = pdf_dict_gets(parent, "Parent");
}
-
- doc->page_len -= end - start;
- doc->needs_page_tree_rebuild = 1;
}
void
pdf_insert_page(pdf_document *doc, pdf_page *page, int at)
{
- if (!doc || !page)
- return;
- if (at < 0)
- at = 0;
- if (at > doc->page_len)
- at = doc->page_len;
+ int count = pdf_count_pages(doc);
+ pdf_obj *parent, *kids;
+ int i;
+
+ if (count == 0)
+ {
+ /* TODO: create new page tree? */
+ fz_throw(doc->ctx, FZ_ERROR_GENERIC, "empty page tree, cannot insert page");
+ }
+ else if (at >= count)
+ {
+ if (at > count)
+ fz_throw(doc->ctx, FZ_ERROR_GENERIC, "cannot insert page beyond end of page tree");
- if (doc->page_len + 1 >= doc->page_cap)
+ /* append after last page */
+ pdf_lookup_page_loc(doc, count - 1, &parent, &i);
+ kids = pdf_dict_gets(parent, "Kids");
+ pdf_array_insert_drop(kids, pdf_new_ref(doc, page->me), i + 1);
+ }
+ else
{
- int newmax = doc->page_cap * 2;
- if (newmax == 0)
- newmax = 4;
- doc->page_refs = fz_resize_array(doc->ctx, doc->page_refs, newmax, sizeof(pdf_page *));
- doc->page_objs = fz_resize_array(doc->ctx, doc->page_objs, newmax, sizeof(pdf_page *));
- doc->page_cap = newmax;
+ /* insert before found page */
+ pdf_lookup_page_loc(doc, at, &parent, &i);
+ kids = pdf_dict_gets(parent, "Kids");
+ pdf_array_insert_drop(kids, pdf_new_ref(doc, page->me), i);
}
- if (doc->page_len > at)
+
+ /* Adjust page counts */
+ while (parent)
{
- memmove(&doc->page_objs[at+1], &doc->page_objs[at], doc->page_len - at);
- memmove(&doc->page_refs[at+1], &doc->page_refs[at], doc->page_len - at);
+ int count = pdf_to_int(pdf_dict_gets(parent, "Count"));
+ pdf_dict_puts_drop(parent, "Count", pdf_new_int(doc, count + 1));
+ parent = pdf_dict_gets(parent, "Parent");
}
+}
- doc->page_len++;
- doc->page_objs[at] = pdf_keep_obj(page->me);
- doc->page_refs[at] = NULL;
- doc->page_refs[at] = pdf_new_ref(doc, page->me);
- doc->needs_page_tree_rebuild = 1;
+void
+pdf_delete_page_range(pdf_document *doc, int start, int end)
+{
+ while (start < end)
+ pdf_delete_page(doc, start++);
}
pdf_page *
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index 63fab4f1..a1b0caa2 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -2450,6 +2450,10 @@ void pdf_write_document(pdf_document *doc, char *filename, fz_write_options *fz_
#define KIDS_PER_LEVEL 32
+#if 0
+
+// TODO: pdf_rebalance_page_tree(doc);
+
static pdf_obj *
make_page_tree_node(pdf_document *doc, int l, int r, pdf_obj *parent_ref, int root)
{
@@ -2516,7 +2520,7 @@ make_page_tree_node(pdf_document *doc, int l, int r, pdf_obj *parent_ref, int ro
}
static void
-pdf_rebuild_page_tree(pdf_document *doc)
+pdf_rebalance_page_tree(pdf_document *doc)
{
pdf_obj *catalog;
pdf_obj *pages;
@@ -2531,10 +2535,16 @@ pdf_rebuild_page_tree(pdf_document *doc)
doc->needs_page_tree_rebuild = 0;
}
+#endif
+
+static void
+pdf_rebalance_page_tree(pdf_document *doc)
+{
+}
+
void pdf_finish_edit(pdf_document *doc)
{
if (!doc)
return;
-
- pdf_rebuild_page_tree(doc);
+ pdf_rebalance_page_tree(doc);
}
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 71213fd7..faf7e81f 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -1085,8 +1085,8 @@ pdf_init_document(pdf_document *doc)
void
pdf_close_document(pdf_document *doc)
{
- int i;
fz_context *ctx;
+ int i;
if (!doc)
return;
@@ -1101,20 +1101,6 @@ pdf_close_document(pdf_document *doc)
pdf_free_xref_sections(doc);
- if (doc->page_objs)
- {
- for (i = 0; i < doc->page_len; i++)
- pdf_drop_obj(doc->page_objs[i]);
- fz_free(ctx, doc->page_objs);
- }
-
- if (doc->page_refs)
- {
- for (i = 0; i < doc->page_len; i++)
- pdf_drop_obj(doc->page_refs[i]);
- fz_free(ctx, doc->page_refs);
- }
-
if (doc->focus_obj)
pdf_drop_obj(doc->focus_obj);
if (doc->file)
diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c
index f42269a8..dc8a1f38 100644
--- a/source/tools/pdfclean.c
+++ b/source/tools/pdfclean.c
@@ -91,8 +91,8 @@ static void retainpages(int argc, char **argv)
for (page = spage; page <= epage; page++)
{
- pdf_obj *pageobj = doc->page_objs[page-1];
- pdf_obj *pageref = doc->page_refs[page-1];
+ pdf_obj *pageref = pdf_lookup_page_obj(doc, page-1);
+ pdf_obj *pageobj = pdf_resolve_indirect(pageref);
pdf_dict_puts(pageobj, "Parent", parent);
diff --git a/source/tools/pdfinfo.c b/source/tools/pdfinfo.c
index 18417452..856029ba 100644
--- a/source/tools/pdfinfo.c
+++ b/source/tools/pdfinfo.c
@@ -572,8 +572,8 @@ gatherresourceinfo(int page, pdf_obj *rsrc, int show)
pdf_obj *subrsrc;
int i;
- pageobj = doc->page_objs[page-1];
- pageref = doc->page_refs[page-1];
+ pageref = pdf_lookup_page_obj(doc, page-1);
+ pageobj = pdf_resolve_indirect(pageref);
if (!pageobj)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
@@ -640,8 +640,8 @@ gatherpageinfo(int page, int show)
pdf_obj *pageref;
pdf_obj *rsrc;
- pageobj = doc->page_objs[page-1];
- pageref = doc->page_refs[page-1];
+ pageref = pdf_lookup_page_obj(doc, page-1);
+ pageobj = pdf_resolve_indirect(pageref);
if (!pageobj)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
diff --git a/source/tools/pdfposter.c b/source/tools/pdfposter.c
index 4702c2aa..3076f7af 100644
--- a/source/tools/pdfposter.c
+++ b/source/tools/pdfposter.c
@@ -81,7 +81,7 @@ static void decimatepages(pdf_document *doc)
fz_rect mb;
int num;
- newpageobj = pdf_copy_dict(doc->page_objs[page]);
+ newpageobj = pdf_copy_dict(pdf_lookup_page_obj(doc, page));
num = pdf_create_object(doc);
pdf_update_object(doc, num, newpageobj);
newpageref = pdf_new_indirect(doc, num, 0);
diff --git a/source/tools/pdfshow.c b/source/tools/pdfshow.c
index 78e3fd08..6e721464 100644
--- a/source/tools/pdfshow.c
+++ b/source/tools/pdfshow.c
@@ -62,7 +62,7 @@ static void showpagetree(void)
count = pdf_count_pages(doc);
for (i = 0; i < count; i++)
{
- ref = doc->page_refs[i];
+ ref = pdf_lookup_page_obj(doc, i);
printf("page %d = %d %d R\n", i + 1, pdf_to_num(ref), pdf_to_gen(ref));
}
printf("\n");