From 636652daee46a9cf9836746135e3f9678db796ec Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Mon, 7 May 2012 11:30:05 +0100 Subject: Switch to reading content streams on the fly during interpretation. Previously, before interpreting a pages content stream we would load it entirely into a buffer. Then we would interpret that buffer. This has a cost in memory use. Here, we update the code to read from a stream on the fly. This has required changes in various different parts of the code. Firstly, we have removed all use of the FILE lock - as stream reads can now safely be interrupted by resource (or object) reads from elsewhere in the file, the file lock becomes a very hard thing to maintain, and doesn't actually benefit us at all. The choices were to either use a recursive lock, or to remove it entirely; I opted for the latter. The file lock enum value remains as a placeholder for future use in extendable data streams. Secondly, we add a new 'concat' filter that concatenates a series of streams together into one, optionally putting whitespace between each stream (as the pdf parser requires this). Finally, we change page/xobject/pattern content streams to work on the fly, but we leave type3 glyphs using buffers (as presumably these will be run repeatedly). --- fitz/filt_basic.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'fitz/filt_basic.c') diff --git a/fitz/filt_basic.c b/fitz/filt_basic.c index ac6a5903..7d504f29 100644 --- a/fitz/filt_basic.c +++ b/fitz/filt_basic.c @@ -62,6 +62,104 @@ fz_open_null(fz_stream *chain, int len, int offset) return fz_new_stream(ctx, state, read_null, close_null); } +/* Concat filter concatenates several streams into one */ + +struct concat_filter +{ + int max; + int count; + int current; + int pad; /* 1 if we should add whitespace padding between streams */ + int ws; /* 1 if we should send a whitespace padding byte next */ + fz_stream *chain[1]; +}; + +static int +read_concat(fz_stream *stm, unsigned char *buf, int len) +{ + struct concat_filter *state = (struct concat_filter *)stm->state; + int n; + int read = 0; + + if (len <= 0) + return 0; + + while (state->current != state->count && len > 0) + { + /* If we need to send a whitespace char, do that */ + if (state->ws) + { + *buf++ = 32; + read++; + len--; + state->ws = 0; + continue; + } + /* Otherwise, read as much data as will fit in the buffer */ + n = fz_read(state->chain[state->current], buf, len); + read += n; + buf += n; + len -= n; + /* If we didn't read any, then we must have hit the end of + * our buffer space. Move to the next stream, and remember to + * pad. */ + if (n == 0) + { + fz_close(state->chain[state->current]); + state->current++; + state->ws = state->pad; + } + } + + return read; +} + +static void +close_concat(fz_context *ctx, void *state_) +{ + struct concat_filter *state = (struct concat_filter *)state_; + int i; + + for (i = state->current; i < state->count; i++) + { + fz_close(state->chain[i]); + } + fz_free(ctx, state); +} + +fz_stream * +fz_open_concat(fz_context *ctx, int len, int pad) +{ + struct concat_filter *state; + + fz_try(ctx) + { + state = fz_calloc(ctx, 1, sizeof(struct concat_filter) + (len-1)*sizeof(fz_stream *)); + state->max = len; + state->count = 0; + state->current = 0; + state->pad = pad; + state->ws = 0; /* We never send padding byte at the start */ + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + return fz_new_stream(ctx, state, read_concat, close_concat); +} + +void +fz_concat_push(fz_stream *concat, fz_stream *chain) +{ + struct concat_filter *state = (struct concat_filter *)concat->state; + + if (state->count == state->max) + fz_throw(concat->ctx, "Concat filter size exceeded"); + + state->chain[state->count++] = chain; +} + /* ASCII Hex Decode */ typedef struct fz_ahxd_s fz_ahxd; -- cgit v1.2.3