diff options
-rw-r--r-- | include/mupdf/pdf/document.h | 6 | ||||
-rw-r--r-- | include/mupdf/pdf/page.h | 1 | ||||
-rw-r--r-- | source/pdf/pdf-page.c | 361 | ||||
-rw-r--r-- | source/pdf/pdf-write.c | 16 | ||||
-rw-r--r-- | source/pdf/pdf-xref.c | 16 | ||||
-rw-r--r-- | source/tools/pdfclean.c | 4 | ||||
-rw-r--r-- | source/tools/pdfinfo.c | 8 | ||||
-rw-r--r-- | source/tools/pdfposter.c | 2 | ||||
-rw-r--r-- | source/tools/pdfshow.c | 2 |
9 files changed, 194 insertions, 222 deletions
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h index c65fe893..9a23fd7e 100644 --- a/include/mupdf/pdf/document.h +++ b/include/mupdf/pdf/document.h @@ -187,12 +187,8 @@ struct pdf_document_s int xref_altered; int freeze_updates; - int page_len; - int page_cap; - pdf_obj **page_objs; - pdf_obj **page_refs; + int page_count; int resources_localised; - int needs_page_tree_rebuild; pdf_lexbuf_large lexbuf; diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h index 7eaa93d9..c1fffa57 100644 --- a/include/mupdf/pdf/page.h +++ b/include/mupdf/pdf/page.h @@ -3,6 +3,7 @@ int pdf_lookup_page_number(pdf_document *doc, pdf_obj *pageobj); int pdf_count_pages(pdf_document *doc); +pdf_obj *pdf_lookup_page_obj(pdf_document *doc, int needle); /* pdf_load_page: Load a page and its resources. diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c index 2e14bf6c..c58995d6 100644 --- a/source/pdf/pdf-page.c +++ b/source/pdf/pdf-page.c @@ -1,180 +1,163 @@ #include "mupdf/pdf.h" -struct info -{ - pdf_obj *resources; - pdf_obj *mediabox; - pdf_obj *cropbox; - pdf_obj *rotate; -}; - -typedef struct pdf_page_load_s pdf_page_load; - -struct pdf_page_load_s +int +pdf_count_pages(pdf_document *doc) { - int max; - int pos; - pdf_obj *node; - pdf_obj *kids; - struct info info; -}; + if (doc->page_count == 0) + { + pdf_obj *count = pdf_dict_getp(pdf_trailer(doc), "Root/Pages/Count"); + doc->page_count = pdf_to_int(count); + } + return doc->page_count; +} -static void -pdf_load_page_tree_node(pdf_document *doc, pdf_obj *node, struct info info) +static pdf_obj * +pdf_lookup_page_loc_imp(pdf_document *doc, pdf_obj *node, int *skip, pdf_obj **parentp, int *indexp) { - pdf_obj *dict, *kids, *count; - pdf_obj *obj; fz_context *ctx = doc->ctx; - pdf_page_load *stack = NULL; - int stacklen = -1; - int stackmax = 0; + pdf_obj *kids, *hit; + int i, len, count; + + count = pdf_to_int(pdf_dict_gets(node, "Count")); + if (*skip > count) + { + /* The princess is in another castle. */ + *skip -= count; + return NULL; + } + + kids = pdf_dict_gets(node, "Kids"); + len = pdf_array_len(kids); + + if (pdf_mark_obj(node)) + fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree"); + + hit = NULL; + fz_var(hit); fz_try(ctx) { - do + for (i = 0; i < len; i++) { - if (!node || pdf_mark_obj(node)) - { - /* NULL node, or we've been here before. - * Nothing to do. */ - } - else + pdf_obj *kid = pdf_array_get(kids, i); + char *type = pdf_to_name(pdf_dict_gets(kid, "Type")); + if (!strcmp(type, "Page")) { - kids = pdf_dict_gets(node, "Kids"); - count = pdf_dict_gets(node, "Count"); - if (pdf_is_array(kids) && pdf_is_int(count)) + if (*skip == 0) { - /* Push this onto the stack */ - obj = pdf_dict_gets(node, "Resources"); - if (obj) - info.resources = obj; - obj = pdf_dict_gets(node, "MediaBox"); - if (obj) - info.mediabox = obj; - obj = pdf_dict_gets(node, "CropBox"); - if (obj) - info.cropbox = obj; - obj = pdf_dict_gets(node, "Rotate"); - if (obj) - info.rotate = obj; - stacklen++; - if (stacklen == stackmax) - { - stack = fz_resize_array(ctx, stack, stackmax ? stackmax*2 : 10, sizeof(*stack)); - stackmax = stackmax ? stackmax*2 : 10; - } - stack[stacklen].kids = kids; - stack[stacklen].node = node; - stack[stacklen].pos = -1; - stack[stacklen].max = pdf_array_len(kids); - stack[stacklen].info = info; + if (parentp) *parentp = node; + if (indexp) *indexp = i; + hit = kid; + break; } - else if ((dict = pdf_to_dict(node)) != NULL) + else { - if (info.resources && !pdf_dict_gets(dict, "Resources")) - pdf_dict_puts(dict, "Resources", info.resources); - if (info.mediabox && !pdf_dict_gets(dict, "MediaBox")) - pdf_dict_puts(dict, "MediaBox", info.mediabox); - if (info.cropbox && !pdf_dict_gets(dict, "CropBox")) - pdf_dict_puts(dict, "CropBox", info.cropbox); - if (info.rotate && !pdf_dict_gets(dict, "Rotate")) - pdf_dict_puts(dict, "Rotate", info.rotate); - - if (doc->page_len == doc->page_cap) - { - fz_warn(ctx, "found more pages than expected"); - doc->page_refs = fz_resize_array(ctx, doc->page_refs, doc->page_cap+1, sizeof(pdf_obj*)); - doc->page_objs = fz_resize_array(ctx, doc->page_objs, doc->page_cap+1, sizeof(pdf_obj*)); - doc->page_cap ++; - } - - doc->page_refs[doc->page_len] = pdf_keep_obj(node); - doc->page_objs[doc->page_len] = pdf_keep_obj(dict); - doc->page_len ++; - pdf_unmark_obj(node); + (*skip)--; } } - /* Get the next node */ - if (stacklen < 0) - break; - while (++stack[stacklen].pos == stack[stacklen].max) + else if (!strcmp(type, "Pages")) { - pdf_unmark_obj(stack[stacklen].node); - stacklen--; - if (stacklen < 0) /* No more to pop! */ + hit = pdf_lookup_page_loc_imp(doc, kid, skip, parentp, indexp); + if (hit) break; - node = stack[stacklen].node; - info = stack[stacklen].info; - pdf_unmark_obj(node); /* Unmark it, cos we're about to mark it again */ } - if (stacklen >= 0) - node = pdf_array_get(stack[stacklen].kids, stack[stacklen].pos); + else + { + fz_throw(ctx, FZ_ERROR_GENERIC, "non-page object in page tree"); + } } - while (stacklen >= 0); } fz_always(ctx) { - while (stacklen >= 0) - pdf_unmark_obj(stack[stacklen--].node); - fz_free(ctx, stack); + pdf_unmark_obj(node); } fz_catch(ctx) { fz_rethrow(ctx); } + + return hit; } -static void -pdf_load_page_tree(pdf_document *doc) +pdf_obj * +pdf_lookup_page_loc(pdf_document *doc, int needle, pdf_obj **parentp, int *indexp) { - fz_context *ctx = doc->ctx; - pdf_obj *catalog; - pdf_obj *pages; - pdf_obj *count; - struct info info; - - if (doc->page_refs) - return; - - catalog = pdf_dict_gets(pdf_trailer(doc), "Root"); - pages = pdf_dict_gets(catalog, "Pages"); - count = pdf_dict_gets(pages, "Count"); - - if (!pdf_is_dict(pages)) - fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree"); - if (!pdf_is_int(count) || pdf_to_int(count) < 0) - fz_throw(ctx, FZ_ERROR_GENERIC, "missing page count"); - - doc->page_cap = pdf_to_int(count); - doc->page_len = 0; - doc->page_refs = fz_malloc_array(ctx, doc->page_cap, sizeof(pdf_obj*)); - doc->page_objs = fz_malloc_array(ctx, doc->page_cap, sizeof(pdf_obj*)); + pdf_obj *root = pdf_dict_gets(pdf_trailer(doc), "Root"); + pdf_obj *node = pdf_dict_gets(root, "Pages"); + int skip = needle; + pdf_obj *hit = pdf_lookup_page_loc_imp(doc, node, &skip, parentp, indexp); + if (!hit) + fz_throw(doc->ctx, FZ_ERROR_GENERIC, "cannot find page %d in page tree", needle); + return hit; +} - info.resources = NULL; - info.mediabox = NULL; - info.cropbox = NULL; - info.rotate = NULL; +pdf_obj * +pdf_lookup_page_obj(pdf_document *doc, int needle) +{ + return pdf_lookup_page_loc(doc, needle, NULL, NULL); +} - pdf_load_page_tree_node(doc, pages, info); +static int +pdf_count_pages_before_kid(pdf_document *doc, pdf_obj *parent, int kid_num) +{ + pdf_obj *count, *kid, *kids = pdf_dict_gets(parent, "Kids"); + int i, total = 0, len = pdf_array_len(kids); + for (i = 0; i < len; i++) + { + kid = pdf_array_get(kids, i); + if (pdf_to_num(kid) == kid_num) + return total; + count = pdf_dict_gets(kid, "Count"); + if (count) + total += pdf_to_int(count); + else + total++; + } + fz_throw(doc->ctx, FZ_ERROR_GENERIC, "kid not found in parent's kids array"); } int -pdf_count_pages(pdf_document *doc) +pdf_lookup_page_number(pdf_document *doc, pdf_obj *node) { - pdf_load_page_tree(doc); - return doc->page_len; + int needle = pdf_to_num(node); + int total = 0; + int depth = 0; + pdf_obj *parent; + + parent = pdf_dict_gets(node, "Parent"); + while (parent) + { + total += pdf_count_pages_before_kid(doc, parent, needle); + needle = pdf_to_num(parent); + parent = pdf_dict_gets(parent, "Parent"); + if (++depth > 100) + fz_throw(doc->ctx, FZ_ERROR_GENERIC, "page tree is too deep"); + } + + return total; } -int -pdf_lookup_page_number(pdf_document *doc, pdf_obj *page) +static pdf_obj * +pdf_lookup_inherited_page_item(pdf_document *doc, pdf_obj *node, const char *key) { - int i, num = pdf_to_num(page); + int depth = 0; + + pdf_obj *val = pdf_dict_gets(node, key); + if (val) + return val; - pdf_load_page_tree(doc); - for (i = 0; i < doc->page_len; i++) - if (num == pdf_to_num(doc->page_refs[i])) - return i; - return -1; + node = pdf_dict_gets(node, "Parent"); + while (node) + { + val = pdf_dict_gets(node, key); + if (val) + return val; + node = pdf_dict_gets(node, "Parent"); + if (++depth > 100) + fz_throw(doc->ctx, FZ_ERROR_GENERIC, "page tree is too deep"); + } + + return NULL; } /* We need to know whether to install a page-level transparency group */ @@ -321,12 +304,8 @@ pdf_load_page(pdf_document *doc, int number) float userunit; fz_matrix mat; - pdf_load_page_tree(doc); - if (number < 0 || number >= doc->page_len) - fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find page %d", number + 1); - - pageobj = doc->page_objs[number]; - pageref = doc->page_refs[number]; + pageref = pdf_lookup_page_obj(doc, number); + pageobj = pdf_resolve_indirect(pageref); page = fz_malloc_struct(ctx, pdf_page); page->resources = NULL; @@ -344,7 +323,7 @@ pdf_load_page(pdf_document *doc, int number) else userunit = 1; - pdf_to_rect(ctx, pdf_dict_gets(pageobj, "MediaBox"), &mediabox); + pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "MediaBox"), &mediabox); if (fz_is_empty_rect(&mediabox)) { fz_warn(ctx, "cannot find page size for page %d", number + 1); @@ -354,7 +333,7 @@ pdf_load_page(pdf_document *doc, int number) mediabox.y1 = 792; } - pdf_to_rect(ctx, pdf_dict_gets(pageobj, "CropBox"), &cropbox); + pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "CropBox"), &cropbox); if (!fz_is_empty_rect(&cropbox)) fz_intersect_rect(&mediabox, &cropbox); @@ -369,7 +348,7 @@ pdf_load_page(pdf_document *doc, int number) page->mediabox = fz_unit_rect; } - page->rotate = pdf_to_int(pdf_dict_gets(pageobj, "Rotate")); + page->rotate = pdf_to_int(pdf_lookup_inherited_page_item(doc, pageobj, "Rotate")); /* Snap page->rotate to 0, 90, 180 or 270 */ if (page->rotate < 0) page->rotate = 360 - ((-page->rotate) % 360); @@ -401,7 +380,8 @@ pdf_load_page(pdf_document *doc, int number) pdf_load_transition(doc, page, obj); } - page->resources = pdf_dict_gets(pageobj, "Resources"); + // TODO: inherit + page->resources = pdf_lookup_inherited_page_item(doc, pageobj, "Resources"); if (page->resources) pdf_keep_obj(page->resources); @@ -469,68 +449,67 @@ pdf_free_page(pdf_document *doc, pdf_page *page) } void -pdf_delete_page(pdf_document *doc, int page) -{ - pdf_delete_page_range(doc, page, page+1); -} - -void -pdf_delete_page_range(pdf_document *doc, int start, int end) +pdf_delete_page(pdf_document *doc, int at) { + pdf_obj *parent, *kids; int i; - if (start > end) - { - int tmp = start; - start = end; - end = tmp; - } + pdf_lookup_page_loc(doc, at, &parent, &i); + kids = pdf_dict_gets(parent, "Kids"); + pdf_array_delete(kids, i); - if (!doc || start >= doc->page_len || end < 0) - return; - - for (i=start; i < end; i++) - pdf_drop_obj(doc->page_refs[i]); - if (doc->page_len > end) + while (parent) { - memmove(&doc->page_refs[start], &doc->page_refs[end], sizeof(pdf_page *) * (doc->page_len - end + start)); - memmove(&doc->page_refs[start], &doc->page_refs[end], sizeof(pdf_page *) * (doc->page_len - end + start)); + int count = pdf_to_int(pdf_dict_gets(parent, "Count")); + pdf_dict_puts_drop(parent, "Count", pdf_new_int(doc, count - 1)); + parent = pdf_dict_gets(parent, "Parent"); } - - doc->page_len -= end - start; - doc->needs_page_tree_rebuild = 1; } void pdf_insert_page(pdf_document *doc, pdf_page *page, int at) { - if (!doc || !page) - return; - if (at < 0) - at = 0; - if (at > doc->page_len) - at = doc->page_len; + int count = pdf_count_pages(doc); + pdf_obj *parent, *kids; + int i; + + if (count == 0) + { + /* TODO: create new page tree? */ + fz_throw(doc->ctx, FZ_ERROR_GENERIC, "empty page tree, cannot insert page"); + } + else if (at >= count) + { + if (at > count) + fz_throw(doc->ctx, FZ_ERROR_GENERIC, "cannot insert page beyond end of page tree"); - if (doc->page_len + 1 >= doc->page_cap) + /* append after last page */ + pdf_lookup_page_loc(doc, count - 1, &parent, &i); + kids = pdf_dict_gets(parent, "Kids"); + pdf_array_insert_drop(kids, pdf_new_ref(doc, page->me), i + 1); + } + else { - int newmax = doc->page_cap * 2; - if (newmax == 0) - newmax = 4; - doc->page_refs = fz_resize_array(doc->ctx, doc->page_refs, newmax, sizeof(pdf_page *)); - doc->page_objs = fz_resize_array(doc->ctx, doc->page_objs, newmax, sizeof(pdf_page *)); - doc->page_cap = newmax; + /* insert before found page */ + pdf_lookup_page_loc(doc, at, &parent, &i); + kids = pdf_dict_gets(parent, "Kids"); + pdf_array_insert_drop(kids, pdf_new_ref(doc, page->me), i); } - if (doc->page_len > at) + + /* Adjust page counts */ + while (parent) { - memmove(&doc->page_objs[at+1], &doc->page_objs[at], doc->page_len - at); - memmove(&doc->page_refs[at+1], &doc->page_refs[at], doc->page_len - at); + int count = pdf_to_int(pdf_dict_gets(parent, "Count")); + pdf_dict_puts_drop(parent, "Count", pdf_new_int(doc, count + 1)); + parent = pdf_dict_gets(parent, "Parent"); } +} - doc->page_len++; - doc->page_objs[at] = pdf_keep_obj(page->me); - doc->page_refs[at] = NULL; - doc->page_refs[at] = pdf_new_ref(doc, page->me); - doc->needs_page_tree_rebuild = 1; +void +pdf_delete_page_range(pdf_document *doc, int start, int end) +{ + while (start < end) + pdf_delete_page(doc, start++); } pdf_page * diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c index 63fab4f1..a1b0caa2 100644 --- a/source/pdf/pdf-write.c +++ b/source/pdf/pdf-write.c @@ -2450,6 +2450,10 @@ void pdf_write_document(pdf_document *doc, char *filename, fz_write_options *fz_ #define KIDS_PER_LEVEL 32 +#if 0 + +// TODO: pdf_rebalance_page_tree(doc); + static pdf_obj * make_page_tree_node(pdf_document *doc, int l, int r, pdf_obj *parent_ref, int root) { @@ -2516,7 +2520,7 @@ make_page_tree_node(pdf_document *doc, int l, int r, pdf_obj *parent_ref, int ro } static void -pdf_rebuild_page_tree(pdf_document *doc) +pdf_rebalance_page_tree(pdf_document *doc) { pdf_obj *catalog; pdf_obj *pages; @@ -2531,10 +2535,16 @@ pdf_rebuild_page_tree(pdf_document *doc) doc->needs_page_tree_rebuild = 0; } +#endif + +static void +pdf_rebalance_page_tree(pdf_document *doc) +{ +} + void pdf_finish_edit(pdf_document *doc) { if (!doc) return; - - pdf_rebuild_page_tree(doc); + pdf_rebalance_page_tree(doc); } diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index 71213fd7..faf7e81f 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -1085,8 +1085,8 @@ pdf_init_document(pdf_document *doc) void pdf_close_document(pdf_document *doc) { - int i; fz_context *ctx; + int i; if (!doc) return; @@ -1101,20 +1101,6 @@ pdf_close_document(pdf_document *doc) pdf_free_xref_sections(doc); - if (doc->page_objs) - { - for (i = 0; i < doc->page_len; i++) - pdf_drop_obj(doc->page_objs[i]); - fz_free(ctx, doc->page_objs); - } - - if (doc->page_refs) - { - for (i = 0; i < doc->page_len; i++) - pdf_drop_obj(doc->page_refs[i]); - fz_free(ctx, doc->page_refs); - } - if (doc->focus_obj) pdf_drop_obj(doc->focus_obj); if (doc->file) diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c index f42269a8..dc8a1f38 100644 --- a/source/tools/pdfclean.c +++ b/source/tools/pdfclean.c @@ -91,8 +91,8 @@ static void retainpages(int argc, char **argv) for (page = spage; page <= epage; page++) { - pdf_obj *pageobj = doc->page_objs[page-1]; - pdf_obj *pageref = doc->page_refs[page-1]; + pdf_obj *pageref = pdf_lookup_page_obj(doc, page-1); + pdf_obj *pageobj = pdf_resolve_indirect(pageref); pdf_dict_puts(pageobj, "Parent", parent); diff --git a/source/tools/pdfinfo.c b/source/tools/pdfinfo.c index 18417452..856029ba 100644 --- a/source/tools/pdfinfo.c +++ b/source/tools/pdfinfo.c @@ -572,8 +572,8 @@ gatherresourceinfo(int page, pdf_obj *rsrc, int show) pdf_obj *subrsrc; int i; - pageobj = doc->page_objs[page-1]; - pageref = doc->page_refs[page-1]; + pageref = pdf_lookup_page_obj(doc, page-1); + pageobj = pdf_resolve_indirect(pageref); if (!pageobj) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page); @@ -640,8 +640,8 @@ gatherpageinfo(int page, int show) pdf_obj *pageref; pdf_obj *rsrc; - pageobj = doc->page_objs[page-1]; - pageref = doc->page_refs[page-1]; + pageref = pdf_lookup_page_obj(doc, page-1); + pageobj = pdf_resolve_indirect(pageref); if (!pageobj) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page); diff --git a/source/tools/pdfposter.c b/source/tools/pdfposter.c index 4702c2aa..3076f7af 100644 --- a/source/tools/pdfposter.c +++ b/source/tools/pdfposter.c @@ -81,7 +81,7 @@ static void decimatepages(pdf_document *doc) fz_rect mb; int num; - newpageobj = pdf_copy_dict(doc->page_objs[page]); + newpageobj = pdf_copy_dict(pdf_lookup_page_obj(doc, page)); num = pdf_create_object(doc); pdf_update_object(doc, num, newpageobj); newpageref = pdf_new_indirect(doc, num, 0); diff --git a/source/tools/pdfshow.c b/source/tools/pdfshow.c index 78e3fd08..6e721464 100644 --- a/source/tools/pdfshow.c +++ b/source/tools/pdfshow.c @@ -62,7 +62,7 @@ static void showpagetree(void) count = pdf_count_pages(doc); for (i = 0; i < count; i++) { - ref = doc->page_refs[i]; + ref = pdf_lookup_page_obj(doc, i); printf("page %d = %d %d R\n", i + 1, pdf_to_num(ref), pdf_to_gen(ref)); } printf("\n"); |