Update seeking behaviour of null streams.

In order to (hopefully) allow page content streams to be interpreted without having to preload them all into memory before we run them, we need to make the stream reading code cope with other users moving the stream pointer. For example: Consider the case where we are midway through interpreting a contents stream, and us hitting an operator that requires something to be read from Resources. This will move the underlying stream file pointer, and cause the contents stream to read incorrectly when control returns to the interpreter. The solution to this seems to be fairly simple; whenever we create a filter out of the file stream, the existing code puts in a 'null' filter first, to enforce a length limit on the stream. This null filter already does most of the work we need it to, in that by it being there, the buffering of data is done in the null filter rather than in the underlying stream layer. All we need to do is to keep track of where in the underlying stream the null filter thinks it is, and ensure that it seeks there before each read (in case anyone else has moved it). We move the setting of the offset to be explicit in the pdf_open_filter (and associated) call(s), rather than requiring fz_seeks elsewhere.
author: Robin Watts <robin.watts@artifex.com> 2012-05-04 15:10:56 +0100
committer: Robin Watts <robin.watts@artifex.com> 2012-05-08 15:14:56 +0100
commit: 2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0 (patch)
tree: 6b181530e563c9445116252cf5945804e934161a /pdf/pdf_stream.c
parent: 6c5d7cb79054f2bea8c109b143e9d453fb2ca1bc (diff)
download: mupdf-2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0.tar.xz
1 files changed, 8 insertions, 16 deletions
diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c
index 84f966ec..5338d81c 100644
--- a/pdf/pdf_stream.c
+++ b/pdf/pdf_stream.c
@@ -226,7 +226,7 @@ build_filter_chain(fz_stream *chain, pdf_document *xref, pdf_obj *fs, pdf_obj *p
  * stream length, followed by a decryption filter.
  */
 static fz_stream *
-pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen)
+pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, int offset)
 {
 	int hascrypt;
 	int len;
@@ -236,7 +236,7 @@ pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int n
 	fz_keep_stream(chain);
 
 	len = pdf_to_int(pdf_dict_gets(stmobj, "Length"));
-	chain = fz_open_null(chain, len);
+	chain = fz_open_null(chain, len, offset);
 
 	fz_try(ctx)
 	{
@@ -258,7 +258,7 @@ pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int n
  * to stream length and decrypting.
  */
 static fz_stream *
-pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, pdf_image_params *imparams)
+pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, int offset, pdf_image_params *imparams)
 {
 	pdf_obj *filters;
 	pdf_obj *params;
@@ -266,7 +266,7 @@ pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num,
 	filters = pdf_dict_getsa(stmobj, "Filter", "F");
 	params = pdf_dict_getsa(stmobj, "DecodeParms", "DP");
 
-	chain = pdf_open_raw_filter(chain, xref, stmobj, num, gen);
+	chain = pdf_open_raw_filter(chain, xref, stmobj, num, gen, offset);
 
 	if (pdf_is_name(filters))
 		chain = build_filter(chain, xref, filters, params, num, gen, imparams);
@@ -298,7 +298,7 @@ pdf_open_inline_stream(pdf_document *xref, pdf_obj *stmobj, int length, fz_strea
 	if (pdf_array_len(filters) > 0)
 		return build_filter_chain(chain, xref, filters, params, 0, 0, imparams);
 
-	return fz_open_null(chain, length);
+	return fz_open_null(chain, length, fz_tell(chain));
 }
 
 /*
@@ -324,9 +324,8 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen)
 	if (x->stm_ofs == 0)
 		fz_throw(xref->ctx, "object is not a stream");
 
-	stm = pdf_open_raw_filter(xref->file, xref, x->obj, num, gen);
+	stm = pdf_open_raw_filter(xref->file, xref, x->obj, num, gen, x->stm_ofs);
 	fz_lock_stream(stm);
-	fz_seek(xref->file, x->stm_ofs, 0);
 	return stm;
 }
 
@@ -345,7 +344,6 @@ fz_stream *
 pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *params)
 {
 	pdf_xref_entry *x;
-	fz_stream *stm;
 
 	if (num < 0 || num >= xref->len)
 		fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen);
@@ -358,9 +356,7 @@ pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa
 	if (x->stm_ofs == 0)
 		fz_throw(xref->ctx, "object is not a stream");
 
-	stm = pdf_open_filter(xref->file, xref, x->obj, num, gen, params);
-	fz_seek(xref->file, x->stm_ofs, 0);
-	return stm;
+	return pdf_open_filter(xref->file, xref, x->obj, num, gen, x->stm_ofs, params);
 }
 
 fz_stream *
@@ -410,14 +406,10 @@ pdf_open_image_decomp_stream(fz_context *ctx, fz_buffer *buffer, pdf_image_param
 fz_stream *
 pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, pdf_obj *dict, int stm_ofs)
 {
-	fz_stream *stm;
-
 	if (stm_ofs == 0)
 		fz_throw(xref->ctx, "object is not a stream");
 
-	stm = pdf_open_filter(xref->file, xref, dict, num, gen, NULL);
-	fz_seek(xref->file, stm_ofs, 0);
-	return stm;
+	return pdf_open_filter(xref->file, xref, dict, num, gen, stm_ofs, NULL);
 }
 
 /*
author	Robin Watts <robin.watts@artifex.com>	2012-05-04 15:10:56 +0100
committer	Robin Watts <robin.watts@artifex.com>	2012-05-08 15:14:56 +0100
commit	2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0 (patch)
tree	6b181530e563c9445116252cf5945804e934161a /pdf/pdf_stream.c
parent	6c5d7cb79054f2bea8c109b143e9d453fb2ca1bc (diff)
download	mupdf-2433a4d16d114a0576e6a4ff9ca61ae4f29fdda0.tar.xz