Switch to reading content streams on the fly during interpretation.

Previously, before interpreting a pages content stream we would load it entirely into a buffer. Then we would interpret that buffer. This has a cost in memory use. Here, we update the code to read from a stream on the fly. This has required changes in various different parts of the code. Firstly, we have removed all use of the FILE lock - as stream reads can now safely be interrupted by resource (or object) reads from elsewhere in the file, the file lock becomes a very hard thing to maintain, and doesn't actually benefit us at all. The choices were to either use a recursive lock, or to remove it entirely; I opted for the latter. The file lock enum value remains as a placeholder for future use in extendable data streams. Secondly, we add a new 'concat' filter that concatenates a series of streams together into one, optionally putting whitespace between each stream (as the pdf parser requires this). Finally, we change page/xobject/pattern content streams to work on the fly, but we leave type3 glyphs using buffers (as presumably these will be run repeatedly).
author: Robin Watts <robin.watts@artifex.com> 2012-05-07 11:30:05 +0100
committer: Robin Watts <robin.watts@artifex.com> 2012-05-08 15:14:57 +0100
commit: 636652daee46a9cf9836746135e3f9678db796ec (patch)
tree: 110e78a0ffcb4a873088c92864ff182d783fdbc3 /pdf/pdf_page.c
parent: 2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0 (diff)
download: mupdf-636652daee46a9cf9836746135e3f9678db796ec.tar.xz
1 files changed, 2 insertions, 68 deletions
diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c
index f5fbc0b0..42e830da 100644
--- a/pdf/pdf_page.c
+++ b/pdf/pdf_page.c
@@ -281,72 +281,6 @@ found:
 	return useBM;
 }
 
-/* we need to combine all sub-streams into one for the content stream interpreter */
-
-static fz_buffer *
-pdf_load_page_contents_array(pdf_document *xref, pdf_obj *list)
-{
-	fz_buffer *big;
-	fz_buffer *one;
-	int i, n;
-	fz_context *ctx = xref->ctx;
-
-	big = fz_new_buffer(ctx, 32 * 1024);
-
-	n = pdf_array_len(list);
-	fz_var(i); /* Workaround Mac compiler bug */
-	for (i = 0; i < n; i++)
-	{
-		pdf_obj *stm = pdf_array_get(list, i);
-		fz_try(ctx)
-		{
-			one = pdf_load_stream(xref, pdf_to_num(stm), pdf_to_gen(stm));
-		}
-		fz_catch(ctx)
-		{
-			fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
-			continue;
-		}
-
-		if (big->len + one->len + 1 > big->cap)
-			fz_resize_buffer(ctx, big, big->len + one->len + 1);
-		memcpy(big->data + big->len, one->data, one->len);
-		big->data[big->len + one->len] = ' ';
-		big->len += one->len + 1;
-
-		fz_drop_buffer(ctx, one);
-	}
-
-	if (n > 0 && big->len == 0)
-	{
-		fz_drop_buffer(ctx, big);
-		fz_throw(ctx, "cannot load content stream");
-	}
-	fz_trim_buffer(ctx, big);
-
-	return big;
-}
-
-static fz_buffer *
-pdf_load_page_contents(pdf_document *xref, pdf_obj *obj)
-{
-	fz_context *ctx = xref->ctx;
-
-	if (pdf_is_array(obj))
-	{
-		return pdf_load_page_contents_array(xref, obj);
-		/* RJW: "cannot load content stream array" */
-	}
-	else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)))
-	{
-		return pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj));
-		/* RJW: "cannot load content stream (%d 0 R)", pdf_to_num(obj) */
-	}
-
-	fz_warn(ctx, "page contents missing, leaving page blank");
-	return fz_new_buffer(ctx, 0);
-}
-
 pdf_page *
 pdf_load_page(pdf_document *xref, int number)
 {
@@ -424,7 +358,7 @@ pdf_load_page(pdf_document *xref, int number)
 	obj = pdf_dict_gets(pageobj, "Contents");
 	fz_try(ctx)
 	{
-		page->contents = pdf_load_page_contents(xref, obj);
+		page->contents = pdf_keep_obj(obj);
 
 		if (pdf_resources_use_blending(ctx, page->resources))
 			page->transparency = 1;
@@ -464,7 +398,7 @@ pdf_free_page(pdf_document *xref, pdf_page *page)
 	if (page->resources)
 		pdf_drop_obj(page->resources);
 	if (page->contents)
-		fz_drop_buffer(xref->ctx, page->contents);
+		pdf_drop_obj(page->contents);
 	if (page->links)
 		fz_drop_link(xref->ctx, page->links);
 	if (page->annots)
author	Robin Watts <robin.watts@artifex.com>	2012-05-07 11:30:05 +0100
committer	Robin Watts <robin.watts@artifex.com>	2012-05-08 15:14:57 +0100
commit	636652daee46a9cf9836746135e3f9678db796ec (patch)
tree	110e78a0ffcb4a873088c92864ff182d783fdbc3 /pdf/pdf_page.c
parent	2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0 (diff)
download	mupdf-636652daee46a9cf9836746135e3f9678db796ec.tar.xz