summaryrefslogtreecommitdiff
path: root/pdf/pdf_page.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2011-04-05 01:18:03 +0200
committerTor Andersson <tor.andersson@artifex.com>2011-04-05 01:18:03 +0200
commit69875363f1766f95c35c1fe429dd85ac9a19add5 (patch)
tree1204d27eadd066a37075c093befdb1973b2e71ce /pdf/pdf_page.c
parentd2de9cee6036b997e536a0c0384b88b38e523e56 (diff)
downloadmupdf-69875363f1766f95c35c1fe429dd85ac9a19add5.tar.xz
Clean up xps and pdf page access functions.
Diffstat (limited to 'pdf/pdf_page.c')
-rw-r--r--pdf/pdf_page.c230
1 files changed, 180 insertions, 50 deletions
diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c
index 685ec3ae..39cae41d 100644
--- a/pdf/pdf_page.c
+++ b/pdf/pdf_page.c
@@ -1,67 +1,123 @@
#include "fitz.h"
#include "mupdf.h"
-/* we need to combine all sub-streams into one for the content stream interpreter */
+struct info
+{
+ fz_obj *resources;
+ fz_obj *mediabox;
+ fz_obj *cropbox;
+ fz_obj *rotate;
+};
+
+int
+pdf_count_pages(pdf_xref *xref)
+{
+ return xref->page_len;
+}
-static fz_error
-pdf_load_page_contents_array(fz_buffer **bigbufp, pdf_xref *xref, fz_obj *list)
+int
+pdf_find_page_number(pdf_xref *xref, fz_obj *page)
{
- fz_error error;
- fz_buffer *big;
- fz_buffer *one;
- int i;
+ int i, num = fz_to_num(page);
+ for (i = 0; i < xref->page_len; i++)
+ if (num == fz_to_num(xref->page_refs[i]))
+ return i;
+ return -1;
+}
- pdf_log_page("multiple content streams: %d\n", fz_array_len(list));
+static void
+pdf_load_page_tree_node(pdf_xref *xref, fz_obj *node, struct info info)
+{
+ fz_obj *dict, *kids, *count;
+ fz_obj *obj, *tmp;
+ int i, n;
- /* TODO: openstream, read, close into big buffer at once */
+ /* prevent infinite recursion */
+ if (fz_dict_gets(node, ".seen"))
+ return;
- big = fz_new_buffer(32 * 1024);
+ kids = fz_dict_gets(node, "Kids");
+ count = fz_dict_gets(node, "Count");
- for (i = 0; i < fz_array_len(list); i++)
+ if (fz_is_array(kids) && fz_is_int(count))
{
- fz_obj *stm = fz_array_get(list, i);
- error = pdf_load_stream(&one, xref, fz_to_num(stm), fz_to_gen(stm));
- if (error)
+ obj = fz_dict_gets(node, "Resources");
+ if (obj)
+ info.resources = obj;
+ obj = fz_dict_gets(node, "MediaBox");
+ if (obj)
+ info.mediabox = obj;
+ obj = fz_dict_gets(node, "CropBox");
+ if (obj)
+ info.cropbox = obj;
+ obj = fz_dict_gets(node, "Rotate");
+ if (obj)
+ info.rotate = obj;
+
+ tmp = fz_new_null();
+ fz_dict_puts(node, ".seen", tmp);
+ fz_drop_obj(tmp);
+
+ n = fz_array_len(kids);
+ for (i = 0; i < n; i++)
{
- fz_drop_buffer(big);
- return fz_rethrow(error, "cannot load content stream part %d/%d (%d %d R)", i + 1, fz_array_len(list), fz_to_num(stm), fz_to_gen(stm));
+ obj = fz_array_get(kids, i);
+ pdf_load_page_tree_node(xref, obj, info);
}
- if (big->len + one->len + 1 > big->cap)
- fz_resize_buffer(big, big->len + one->len + 1);
- memcpy(big->data + big->len, one->data, one->len);
- big->data[big->len + one->len] = ' ';
- big->len += one->len + 1;
-
- fz_drop_buffer(one);
+ fz_dict_dels(node, ".seen");
}
+ else
+ {
+ dict = fz_resolve_indirect(node);
+
+ if (info.resources && !fz_dict_gets(dict, "Resources"))
+ fz_dict_puts(dict, "Resources", info.resources);
+ if (info.mediabox && !fz_dict_gets(dict, "MediaBox"))
+ fz_dict_puts(dict, "MediaBox", info.mediabox);
+ if (info.cropbox && !fz_dict_gets(dict, "CropBox"))
+ fz_dict_puts(dict, "CropBox", info.cropbox);
+ if (info.rotate && !fz_dict_gets(dict, "Rotate"))
+ fz_dict_puts(dict, "Rotate", info.rotate);
+
+ if (xref->page_len == xref->page_cap)
+ {
+ fz_warn("found more pages than expected");
+ xref->page_cap ++;
+ xref->page_refs = fz_realloc(xref->page_refs, xref->page_cap, sizeof(fz_obj*));
+ xref->page_objs = fz_realloc(xref->page_objs, xref->page_cap, sizeof(fz_obj*));
+ }
- *bigbufp = big;
- return fz_okay;
+ xref->page_refs[xref->page_len] = fz_keep_obj(node);
+ xref->page_objs[xref->page_len] = fz_keep_obj(dict);
+ xref->page_len ++;
+ }
}
-static fz_error
-pdf_load_page_contents(fz_buffer **bufp, pdf_xref *xref, fz_obj *obj)
+fz_error
+pdf_load_page_tree(pdf_xref *xref)
{
- fz_error error;
+ struct info info;
+ fz_obj *catalog = fz_dict_gets(xref->trailer, "Root");
+ fz_obj *pages = fz_dict_gets(catalog, "Pages");
+ fz_obj *count = fz_dict_gets(pages, "Count");
- if (fz_is_array(obj))
- {
- error = pdf_load_page_contents_array(bufp, xref, obj);
- if (error)
- return fz_rethrow(error, "cannot load content stream array (%d 0 R)", fz_to_num(obj));
- }
- else if (pdf_is_stream(xref, fz_to_num(obj), fz_to_gen(obj)))
- {
- error = pdf_load_stream(bufp, xref, fz_to_num(obj), fz_to_gen(obj));
- if (error)
- return fz_rethrow(error, "cannot load content stream (%d 0 R)", fz_to_num(obj));
- }
- else
- {
- fz_warn("page contents missing, leaving page blank");
- *bufp = fz_new_buffer(0);
- }
+ if (!fz_is_dict(pages))
+ return fz_throw("missing page tree");
+ if (!fz_is_int(count))
+ return fz_throw("missing page count");
+
+ xref->page_cap = fz_to_int(count);
+ xref->page_len = 0;
+ xref->page_refs = fz_calloc(xref->page_cap, sizeof(fz_obj*));
+ xref->page_objs = fz_calloc(xref->page_cap, sizeof(fz_obj*));
+
+ info.resources = NULL;
+ info.mediabox = NULL;
+ info.cropbox = NULL;
+ info.rotate = NULL;
+
+ pdf_load_page_tree_node(xref, pages, info);
return fz_okay;
}
@@ -149,21 +205,92 @@ found:
return 1;
}
+/* we need to combine all sub-streams into one for the content stream interpreter */
+
+static fz_error
+pdf_load_page_contents_array(fz_buffer **bigbufp, pdf_xref *xref, fz_obj *list)
+{
+ fz_error error;
+ fz_buffer *big;
+ fz_buffer *one;
+ int i;
+
+ pdf_log_page("multiple content streams: %d\n", fz_array_len(list));
+
+ /* TODO: openstream, read, close into big buffer at once */
+
+ big = fz_new_buffer(32 * 1024);
+
+ for (i = 0; i < fz_array_len(list); i++)
+ {
+ fz_obj *stm = fz_array_get(list, i);
+ error = pdf_load_stream(&one, xref, fz_to_num(stm), fz_to_gen(stm));
+ if (error)
+ {
+ fz_drop_buffer(big);
+ return fz_rethrow(error, "cannot load content stream part %d/%d (%d 0 R)",
+ i + 1, fz_array_len(list), fz_to_num(stm));
+ }
+
+ if (big->len + one->len + 1 > big->cap)
+ fz_resize_buffer(big, big->len + one->len + 1);
+ memcpy(big->data + big->len, one->data, one->len);
+ big->data[big->len + one->len] = ' ';
+ big->len += one->len + 1;
+
+ fz_drop_buffer(one);
+ }
+
+ *bigbufp = big;
+ return fz_okay;
+}
+
+static fz_error
+pdf_load_page_contents(fz_buffer **bufp, pdf_xref *xref, fz_obj *obj)
+{
+ fz_error error;
+
+ if (fz_is_array(obj))
+ {
+ error = pdf_load_page_contents_array(bufp, xref, obj);
+ if (error)
+ return fz_rethrow(error, "cannot load content stream array (%d 0 R)", fz_to_num(obj));
+ }
+ else if (pdf_is_stream(xref, fz_to_num(obj), fz_to_gen(obj)))
+ {
+ error = pdf_load_stream(bufp, xref, fz_to_num(obj), fz_to_gen(obj));
+ if (error)
+ return fz_rethrow(error, "cannot load content stream (%d 0 R)", fz_to_num(obj));
+ }
+ else
+ {
+ fz_warn("page contents missing, leaving page blank");
+ *bufp = fz_new_buffer(0);
+ }
+
+ return fz_okay;
+}
+
fz_error
-pdf_load_page(pdf_page **pagep, pdf_xref *xref, fz_obj *dict)
+pdf_load_page(pdf_page **pagep, pdf_xref *xref, int number)
{
fz_error error;
pdf_page *page;
+ fz_obj *dict;
fz_obj *obj;
fz_bbox bbox;
+ if (number < 0 || number >= xref->page_len)
+ return fz_throw("cannot find page %d", number + 1);
+
pdf_log_page("load page {\n");
- // TODO: move this to a more appropriate place
/* Ensure that we have a store for resource objects */
if (!xref->store)
xref->store = pdf_new_store();
+ dict = xref->page_objs[number];
+
page = fz_malloc(sizeof(pdf_page));
page->resources = NULL;
page->contents = NULL;
@@ -175,7 +302,7 @@ pdf_load_page(pdf_page **pagep, pdf_xref *xref, fz_obj *dict)
bbox = fz_round_rect(pdf_to_rect(obj));
if (fz_is_empty_rect(pdf_to_rect(obj)))
{
- fz_warn("cannot find page bounds, guessing page bounds.");
+ fz_warn("cannot find page size for page %d", number + 1);
bbox.x0 = 0;
bbox.y0 = 0;
bbox.x1 = 612;
@@ -195,7 +322,10 @@ pdf_load_page(pdf_page **pagep, pdf_xref *xref, fz_obj *dict)
page->mediabox.y1 = MAX(bbox.y0, bbox.y1);
if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1)
- return fz_throw("invalid page size");
+ {
+ fz_warn("invalid page size in page %d", number + 1);
+ page->mediabox = fz_unit_rect;
+ }
page->rotate = fz_to_int(fz_dict_gets(dict, "Rotate"));
@@ -218,7 +348,7 @@ pdf_load_page(pdf_page **pagep, pdf_xref *xref, fz_obj *dict)
if (error)
{
pdf_free_page(page);
- return fz_rethrow(error, "cannot load page contents (%d %d R)", fz_to_num(obj), fz_to_gen(obj));
+ return fz_rethrow(error, "cannot load page contents for page %d (%d 0 R)", number + 1, fz_to_num(obj));
}
if (page->resources && pdf_resources_use_blending(page->resources))