From 636652daee46a9cf9836746135e3f9678db796ec Mon Sep 17 00:00:00 2001
From: Robin Watts <robin.watts@artifex.com>
Date: Mon, 7 May 2012 11:30:05 +0100
Subject: Switch to reading content streams on the fly during interpretation.

Previously, before interpreting a pages content stream we would
load it entirely into a buffer. Then we would interpret that
buffer. This has a cost in memory use.

Here, we update the code to read from a stream on the fly.

This has required changes in various different parts of the code.

Firstly, we have removed all use of the FILE lock - as stream
reads can now safely be interrupted by resource (or object) reads
from elsewhere in the file, the file lock becomes a very hard
thing to maintain, and doesn't actually benefit us at all. The
choices were to either use a recursive lock, or to remove it
entirely; I opted for the latter.

The file lock enum value remains as a placeholder for future use in
extendable data streams.

Secondly, we add a new 'concat' filter that concatenates a series of
streams together into one, optionally putting whitespace between each
stream (as the pdf parser requires this).

Finally, we change page/xobject/pattern content streams to work
on the fly, but we leave type3 glyphs using buffers (as presumably
these will be run repeatedly).
---
 fitz/filt_basic.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

(limited to 'fitz/filt_basic.c')

diff --git a/fitz/filt_basic.c b/fitz/filt_basic.c
index ac6a5903..7d504f29 100644
--- a/fitz/filt_basic.c
+++ b/fitz/filt_basic.c
@@ -62,6 +62,104 @@ fz_open_null(fz_stream *chain, int len, int offset)
 	return fz_new_stream(ctx, state, read_null, close_null);
 }
 
+/* Concat filter concatenates several streams into one */
+
+struct concat_filter
+{
+	int max;
+	int count;
+	int current;
+	int pad; /* 1 if we should add whitespace padding between streams */
+	int ws; /* 1 if we should send a whitespace padding byte next */
+	fz_stream *chain[1];
+};
+
+static int
+read_concat(fz_stream *stm, unsigned char *buf, int len)
+{
+	struct concat_filter *state = (struct concat_filter *)stm->state;
+	int n;
+	int read = 0;
+
+	if (len <= 0)
+		return 0;
+
+	while (state->current != state->count && len > 0)
+	{
+		/* If we need to send a whitespace char, do that */
+		if (state->ws)
+		{
+			*buf++ = 32;
+			read++;
+			len--;
+			state->ws = 0;
+			continue;
+		}
+		/* Otherwise, read as much data as will fit in the buffer */
+		n = fz_read(state->chain[state->current], buf, len);
+		read += n;
+		buf += n;
+		len -= n;
+		/* If we didn't read any, then we must have hit the end of
+		 * our buffer space. Move to the next stream, and remember to
+		 * pad. */
+		if (n == 0)
+		{
+			fz_close(state->chain[state->current]);
+			state->current++;
+			state->ws = state->pad;
+		}
+	}
+
+	return read;
+}
+
+static void
+close_concat(fz_context *ctx, void *state_)
+{
+	struct concat_filter *state = (struct concat_filter *)state_;
+	int i;
+
+	for (i = state->current; i < state->count; i++)
+	{
+		fz_close(state->chain[i]);
+	}
+	fz_free(ctx, state);
+}
+
+fz_stream *
+fz_open_concat(fz_context *ctx, int len, int pad)
+{
+	struct concat_filter *state;
+
+	fz_try(ctx)
+	{
+		state = fz_calloc(ctx, 1, sizeof(struct concat_filter) + (len-1)*sizeof(fz_stream *));
+		state->max = len;
+		state->count = 0;
+		state->current = 0;
+		state->pad = pad;
+		state->ws = 0; /* We never send padding byte at the start */
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow(ctx);
+	}
+
+	return fz_new_stream(ctx, state, read_concat, close_concat);
+}
+
+void
+fz_concat_push(fz_stream *concat, fz_stream *chain)
+{
+	struct concat_filter *state = (struct concat_filter *)concat->state;
+
+	if (state->count == state->max)
+		fz_throw(concat->ctx, "Concat filter size exceeded");
+
+	state->chain[state->count++] = chain;
+}
+
 /* ASCII Hex Decode */
 
 typedef struct fz_ahxd_s fz_ahxd;
-- 
cgit v1.2.3