diff options
author | Robin Watts <robin.watts@artifex.com> | 2012-05-07 11:30:05 +0100 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2012-05-08 15:14:57 +0100 |
commit | 636652daee46a9cf9836746135e3f9678db796ec (patch) | |
tree | 110e78a0ffcb4a873088c92864ff182d783fdbc3 /pdf/pdf_page.c | |
parent | 2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0 (diff) | |
download | mupdf-636652daee46a9cf9836746135e3f9678db796ec.tar.xz |
Switch to reading content streams on the fly during interpretation.
Previously, before interpreting a pages content stream we would
load it entirely into a buffer. Then we would interpret that
buffer. This has a cost in memory use.
Here, we update the code to read from a stream on the fly.
This has required changes in various different parts of the code.
Firstly, we have removed all use of the FILE lock - as stream
reads can now safely be interrupted by resource (or object) reads
from elsewhere in the file, the file lock becomes a very hard
thing to maintain, and doesn't actually benefit us at all. The
choices were to either use a recursive lock, or to remove it
entirely; I opted for the latter.
The file lock enum value remains as a placeholder for future use in
extendable data streams.
Secondly, we add a new 'concat' filter that concatenates a series of
streams together into one, optionally putting whitespace between each
stream (as the pdf parser requires this).
Finally, we change page/xobject/pattern content streams to work
on the fly, but we leave type3 glyphs using buffers (as presumably
these will be run repeatedly).
Diffstat (limited to 'pdf/pdf_page.c')
-rw-r--r-- | pdf/pdf_page.c | 70 |
1 files changed, 2 insertions, 68 deletions
diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c index f5fbc0b0..42e830da 100644 --- a/pdf/pdf_page.c +++ b/pdf/pdf_page.c @@ -281,72 +281,6 @@ found: return useBM; } -/* we need to combine all sub-streams into one for the content stream interpreter */ - -static fz_buffer * -pdf_load_page_contents_array(pdf_document *xref, pdf_obj *list) -{ - fz_buffer *big; - fz_buffer *one; - int i, n; - fz_context *ctx = xref->ctx; - - big = fz_new_buffer(ctx, 32 * 1024); - - n = pdf_array_len(list); - fz_var(i); /* Workaround Mac compiler bug */ - for (i = 0; i < n; i++) - { - pdf_obj *stm = pdf_array_get(list, i); - fz_try(ctx) - { - one = pdf_load_stream(xref, pdf_to_num(stm), pdf_to_gen(stm)); - } - fz_catch(ctx) - { - fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n); - continue; - } - - if (big->len + one->len + 1 > big->cap) - fz_resize_buffer(ctx, big, big->len + one->len + 1); - memcpy(big->data + big->len, one->data, one->len); - big->data[big->len + one->len] = ' '; - big->len += one->len + 1; - - fz_drop_buffer(ctx, one); - } - - if (n > 0 && big->len == 0) - { - fz_drop_buffer(ctx, big); - fz_throw(ctx, "cannot load content stream"); - } - fz_trim_buffer(ctx, big); - - return big; -} - -static fz_buffer * -pdf_load_page_contents(pdf_document *xref, pdf_obj *obj) -{ - fz_context *ctx = xref->ctx; - - if (pdf_is_array(obj)) - { - return pdf_load_page_contents_array(xref, obj); - /* RJW: "cannot load content stream array" */ - } - else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))) - { - return pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); - /* RJW: "cannot load content stream (%d 0 R)", pdf_to_num(obj) */ - } - - fz_warn(ctx, "page contents missing, leaving page blank"); - return fz_new_buffer(ctx, 0); -} - pdf_page * pdf_load_page(pdf_document *xref, int number) { @@ -424,7 +358,7 @@ pdf_load_page(pdf_document *xref, int number) obj = pdf_dict_gets(pageobj, "Contents"); fz_try(ctx) { - page->contents = pdf_load_page_contents(xref, obj); + page->contents = pdf_keep_obj(obj); if (pdf_resources_use_blending(ctx, page->resources)) page->transparency = 1; @@ -464,7 +398,7 @@ pdf_free_page(pdf_document *xref, pdf_page *page) if (page->resources) pdf_drop_obj(page->resources); if (page->contents) - fz_drop_buffer(xref->ctx, page->contents); + pdf_drop_obj(page->contents); if (page->links) fz_drop_link(xref->ctx, page->links); if (page->annots) |