Fix operator buffering of inline images.

Previously pdf_process buffer did not understand inline images. In order to make this work without needlessly duplicating complex code from within pdf-op-run, the parsing of inline images has been moved to happen in pdf-interpret.c. When the op_table entry for BI is called it now expects the inline image to be in csi->img and the dictionary object to be in csi->obj. To make this work, we have had to improve the handling of inline images in general. While non-inline images have been loaded and held in memory in their compressed form and only decoded when required, until now we have always loaded and decoded inline images immediately. This has been due to the difficulty in knowing how many bytes of data to read from the stream - we know the length of the stream once uncompressed, but relating this to the compressed length is hard. To cure this we introduce a new type of filter stream, a 'leecher'. We insert a leecher stream before we build the filters required to decode the image. We then read and discard the appropriate number of uncompressed bytes from the filters. This pulls the compressed data through the leecher stream, which stores it in an fz_buffer. Thus images are now always held in their compressed forms in memory. The pdf-op-run implementation is now trivial. The only real complexity in the pdf-op-buffer implementation is the need to ensure that the /Filter entry in the dictionary object matches the exact point at which we backstopped the decompression.
author: Robin Watts <robin.watts@artifex.com> 2014-03-14 20:01:32 +0000
committer: Robin Watts <robin.watts@artifex.com> 2014-03-18 17:48:40 +0000
commit: 4c2715a0bcecfed6ebdfee901920631b09364d7e (patch)
tree: b0e5aa723719159a779425329ddc6ce48b8af0ce /source/pdf/pdf-op-run.c
parent: 551de42088c58dc69fba06fb53e36c2ddb12367f (diff)
download: mupdf-4c2715a0bcecfed6ebdfee901920631b09364d7e.tar.xz
1 files changed, 1 insertions, 55 deletions
diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c
index 0f74667a..5575bb00 100644
--- a/source/pdf/pdf-op-run.c
+++ b/source/pdf/pdf-op-run.c
@@ -1735,62 +1735,8 @@ static void pdf_run_BDC(pdf_csi *csi, void *state)
 static void pdf_run_BI(pdf_csi *csi, void *state)
 {
 	pdf_run_state *pr = (pdf_run_state *)state;
-	fz_context *ctx = csi->doc->ctx;
-	pdf_obj *rdb = csi->rdb;
-	fz_stream *file = csi->file;
-	int ch;
-	fz_image *img;
-	pdf_obj *obj;
-	int found;
-
-	obj = pdf_parse_dict(csi->doc, file, &csi->doc->lexbuf.base);
-
-	/* read whitespace after ID keyword */
-	ch = fz_read_byte(file);
-	if (ch == '\r')
-		if (fz_peek_byte(file) == '\n')
-			fz_read_byte(file);
-
-	fz_try(ctx)
-	{
-		img = pdf_load_inline_image(csi->doc, rdb, obj, file);
-	}
-	fz_always(ctx)
-	{
-		pdf_drop_obj(obj);
-	}
-	fz_catch(ctx)
-	{
-		fz_rethrow(ctx);
-	}
-
-	pdf_show_image(csi, pr, img);
 
-	fz_drop_image(ctx, img);
-
-	/* find EI */
-	found = 0;
-	ch = fz_read_byte(file);
-	do
-	{
-		while (ch != 'E' && ch != EOF)
-			ch = fz_read_byte(file);
-		if (ch == 'E')
-		{
-			ch = fz_read_byte(file);
-			if (ch == 'I')
-			{
-				ch = fz_peek_byte(file);
-				if (ch == ' ' || ch <= 32 || ch == EOF || ch == '<' || ch == '/')
-				{
-					found = 1;
-					break;
-				}
-			}
-		}
-	} while (ch != EOF);
-	if (!found)
-		fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error after inline image");
+	pdf_show_image(csi, pr, csi->img);
 }
 
 static void pdf_run_B(pdf_csi *csi, void *state)
author	Robin Watts <robin.watts@artifex.com>	2014-03-14 20:01:32 +0000
committer	Robin Watts <robin.watts@artifex.com>	2014-03-18 17:48:40 +0000
commit	4c2715a0bcecfed6ebdfee901920631b09364d7e (patch)
tree	b0e5aa723719159a779425329ddc6ce48b8af0ce /source/pdf/pdf-op-run.c
parent	551de42088c58dc69fba06fb53e36c2ddb12367f (diff)
download	mupdf-4c2715a0bcecfed6ebdfee901920631b09364d7e.tar.xz