summaryrefslogtreecommitdiff
path: root/pdf/pdf_page.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-05-07 11:30:05 +0100
committerRobin Watts <robin.watts@artifex.com>2012-05-08 15:14:57 +0100
commit636652daee46a9cf9836746135e3f9678db796ec (patch)
tree110e78a0ffcb4a873088c92864ff182d783fdbc3 /pdf/pdf_page.c
parent2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0 (diff)
downloadmupdf-636652daee46a9cf9836746135e3f9678db796ec.tar.xz
Switch to reading content streams on the fly during interpretation.
Previously, before interpreting a pages content stream we would load it entirely into a buffer. Then we would interpret that buffer. This has a cost in memory use. Here, we update the code to read from a stream on the fly. This has required changes in various different parts of the code. Firstly, we have removed all use of the FILE lock - as stream reads can now safely be interrupted by resource (or object) reads from elsewhere in the file, the file lock becomes a very hard thing to maintain, and doesn't actually benefit us at all. The choices were to either use a recursive lock, or to remove it entirely; I opted for the latter. The file lock enum value remains as a placeholder for future use in extendable data streams. Secondly, we add a new 'concat' filter that concatenates a series of streams together into one, optionally putting whitespace between each stream (as the pdf parser requires this). Finally, we change page/xobject/pattern content streams to work on the fly, but we leave type3 glyphs using buffers (as presumably these will be run repeatedly).
Diffstat (limited to 'pdf/pdf_page.c')
-rw-r--r--pdf/pdf_page.c70
1 files changed, 2 insertions, 68 deletions
diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c
index f5fbc0b0..42e830da 100644
--- a/pdf/pdf_page.c
+++ b/pdf/pdf_page.c
@@ -281,72 +281,6 @@ found:
return useBM;
}
-/* we need to combine all sub-streams into one for the content stream interpreter */
-
-static fz_buffer *
-pdf_load_page_contents_array(pdf_document *xref, pdf_obj *list)
-{
- fz_buffer *big;
- fz_buffer *one;
- int i, n;
- fz_context *ctx = xref->ctx;
-
- big = fz_new_buffer(ctx, 32 * 1024);
-
- n = pdf_array_len(list);
- fz_var(i); /* Workaround Mac compiler bug */
- for (i = 0; i < n; i++)
- {
- pdf_obj *stm = pdf_array_get(list, i);
- fz_try(ctx)
- {
- one = pdf_load_stream(xref, pdf_to_num(stm), pdf_to_gen(stm));
- }
- fz_catch(ctx)
- {
- fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
- continue;
- }
-
- if (big->len + one->len + 1 > big->cap)
- fz_resize_buffer(ctx, big, big->len + one->len + 1);
- memcpy(big->data + big->len, one->data, one->len);
- big->data[big->len + one->len] = ' ';
- big->len += one->len + 1;
-
- fz_drop_buffer(ctx, one);
- }
-
- if (n > 0 && big->len == 0)
- {
- fz_drop_buffer(ctx, big);
- fz_throw(ctx, "cannot load content stream");
- }
- fz_trim_buffer(ctx, big);
-
- return big;
-}
-
-static fz_buffer *
-pdf_load_page_contents(pdf_document *xref, pdf_obj *obj)
-{
- fz_context *ctx = xref->ctx;
-
- if (pdf_is_array(obj))
- {
- return pdf_load_page_contents_array(xref, obj);
- /* RJW: "cannot load content stream array" */
- }
- else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)))
- {
- return pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj));
- /* RJW: "cannot load content stream (%d 0 R)", pdf_to_num(obj) */
- }
-
- fz_warn(ctx, "page contents missing, leaving page blank");
- return fz_new_buffer(ctx, 0);
-}
-
pdf_page *
pdf_load_page(pdf_document *xref, int number)
{
@@ -424,7 +358,7 @@ pdf_load_page(pdf_document *xref, int number)
obj = pdf_dict_gets(pageobj, "Contents");
fz_try(ctx)
{
- page->contents = pdf_load_page_contents(xref, obj);
+ page->contents = pdf_keep_obj(obj);
if (pdf_resources_use_blending(ctx, page->resources))
page->transparency = 1;
@@ -464,7 +398,7 @@ pdf_free_page(pdf_document *xref, pdf_page *page)
if (page->resources)
pdf_drop_obj(page->resources);
if (page->contents)
- fz_drop_buffer(xref->ctx, page->contents);
+ pdf_drop_obj(page->contents);
if (page->links)
fz_drop_link(xref->ctx, page->links);
if (page->annots)