Switch to reading content streams on the fly during interpretation.

Previously, before interpreting a pages content stream we would load it entirely into a buffer. Then we would interpret that buffer. This has a cost in memory use. Here, we update the code to read from a stream on the fly. This has required changes in various different parts of the code. Firstly, we have removed all use of the FILE lock - as stream reads can now safely be interrupted by resource (or object) reads from elsewhere in the file, the file lock becomes a very hard thing to maintain, and doesn't actually benefit us at all. The choices were to either use a recursive lock, or to remove it entirely; I opted for the latter. The file lock enum value remains as a placeholder for future use in extendable data streams. Secondly, we add a new 'concat' filter that concatenates a series of streams together into one, optionally putting whitespace between each stream (as the pdf parser requires this). Finally, we change page/xobject/pattern content streams to work on the fly, but we leave type3 glyphs using buffers (as presumably these will be run repeatedly).
author: Robin Watts <robin.watts@artifex.com> 2012-05-07 11:30:05 +0100
committer: Robin Watts <robin.watts@artifex.com> 2012-05-08 15:14:57 +0100
commit: 636652daee46a9cf9836746135e3f9678db796ec (patch)
tree: 110e78a0ffcb4a873088c92864ff182d783fdbc3 /pdf/pdf_stream.c
parent: 2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0 (diff)
download: mupdf-636652daee46a9cf9836746135e3f9678db796ec.tar.xz
1 files changed, 47 insertions, 5 deletions
diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c
index 5338d81c..3086fbc9 100644
--- a/pdf/pdf_stream.c
+++ b/pdf/pdf_stream.c
@@ -273,7 +273,6 @@ pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num,
 	else if (pdf_array_len(filters) > 0)
 		chain = build_filter_chain(chain, xref, filters, params, num, gen, imparams);
 
-	fz_lock_stream(chain);
 	return chain;
 }
 
@@ -309,7 +308,6 @@ fz_stream *
 pdf_open_raw_stream(pdf_document *xref, int num, int gen)
 {
 	pdf_xref_entry *x;
-	fz_stream *stm;
 
 	fz_var(x);
 
@@ -324,9 +322,7 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen)
 	if (x->stm_ofs == 0)
 		fz_throw(xref->ctx, "object is not a stream");
 
-	stm = pdf_open_raw_filter(xref->file, xref, x->obj, num, gen, x->stm_ofs);
-	fz_lock_stream(stm);
-	return stm;
+	return pdf_open_raw_filter(xref->file, xref, x->obj, num, gen, x->stm_ofs);
 }
 
 /*
@@ -506,3 +502,49 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa
 
 	return buf;
 }
+
+static fz_stream *
+pdf_open_object_array(pdf_document *xref, pdf_obj *list)
+{
+	int i, n;
+	fz_context *ctx = xref->ctx;
+	fz_stream *stm;
+
+	n = pdf_array_len(list);
+	stm = fz_open_concat(ctx, n, 1);
+
+	fz_var(i); /* Workaround Mac compiler bug */
+	for (i = 0; i < n; i++)
+	{
+		pdf_obj *obj = pdf_array_get(list, i);
+		fz_try(ctx)
+		{
+			fz_concat_push(stm, pdf_open_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)));
+		}
+		fz_catch(ctx)
+		{
+			fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
+			continue;
+		}
+	}
+
+	return stm;
+}
+
+fz_stream *
+pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj)
+{
+	fz_context *ctx = xref->ctx;
+
+	if (pdf_is_array(obj))
+	{
+		return pdf_open_object_array(xref, obj);
+	}
+	else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)))
+	{
+		return pdf_open_image_stream(xref, pdf_to_num(obj), pdf_to_gen(obj), NULL);
+	}
+
+	fz_warn(ctx, "pdf object stream missing (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj));
+	return NULL;
+}
author	Robin Watts <robin.watts@artifex.com>	2012-05-07 11:30:05 +0100
committer	Robin Watts <robin.watts@artifex.com>	2012-05-08 15:14:57 +0100
commit	636652daee46a9cf9836746135e3f9678db796ec (patch)
tree	110e78a0ffcb4a873088c92864ff182d783fdbc3 /pdf/pdf_stream.c
parent	2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0 (diff)
download	mupdf-636652daee46a9cf9836746135e3f9678db796ec.tar.xz