From 09f2e173850e011e6390c49a4f761e87dd87ffba Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Tue, 10 Jul 2018 12:58:57 +0200 Subject: Clean up null/range/endstream filter. Use separate functions to keep the code simpler. Use memmem to simplify and optimize search for 'endstream' token. Do not look for 'endobj' since that could cause a false positives in compressed object streams that have duff lengths. --- source/fitz/filter-basic.c | 310 ++++++++++++++++++++++++++++----------------- source/fitz/untar.c | 2 +- source/fitz/unzip.c | 2 +- 3 files changed, 197 insertions(+), 117 deletions(-) (limited to 'source/fitz') diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c index 6928ffff..0713a62e 100644 --- a/source/fitz/filter-basic.c +++ b/source/fitz/filter-basic.c @@ -2,39 +2,90 @@ #include -/* Null filter copies a specified amount of data */ +/* The null filter reads a specified amount of data from the substream. */ struct null_filter +{ + fz_stream *chain; + size_t remain; + int64_t offset; + unsigned char buffer[4096]; +}; + +static int +next_null(fz_context *ctx, fz_stream *stm, size_t max) +{ + struct null_filter *state = stm->state; + size_t n; + + if (state->remain == 0) + return EOF; + + fz_seek(ctx, state->chain, state->offset, 0); + n = fz_available(ctx, state->chain, max); + if (n == 0) + return EOF; + if (n > state->remain) + n = state->remain; + if (n > sizeof(state->buffer)) + n = sizeof(state->buffer); + + memcpy(state->buffer, state->chain->rp, n); + stm->rp = state->buffer; + stm->wp = stm->rp + n; + state->chain->rp += n; + state->remain -= n; + state->offset += n; + stm->pos += n; + return *stm->rp++; +} + +static void +close_null(fz_context *ctx, void *state_) +{ + struct null_filter *state = (struct null_filter *)state_; + fz_drop_stream(ctx, state->chain); + fz_free(ctx, state); +} + +fz_stream * +fz_open_null_filter(fz_context *ctx, fz_stream *chain, int len, int64_t offset) +{ + struct null_filter *state = fz_malloc_struct(ctx, struct null_filter); + state->chain = fz_keep_stream(ctx, chain); + state->remain = len; + state->offset = offset; + return fz_new_stream(ctx, state, next_null, close_null); +} + +/* The range filter copies data from specified ranges of the chained stream */ + +struct range_filter { fz_stream *chain; fz_range *ranges; - int look_for_endstream; int nranges; int next_range; size_t remain; - unsigned int extras; - unsigned int size; int64_t offset; unsigned char buffer[4096]; }; static int -next_null(fz_context *ctx, fz_stream *stm, size_t max) +next_range(fz_context *ctx, fz_stream *stm, size_t max) { - struct null_filter *state = stm->state; - size_t n, i, nbytes_in_buffer; - const char *rp; - unsigned int size; + struct range_filter *state = stm->state; + size_t n; while (state->remain == 0 && state->next_range < state->nranges) { fz_range *range = &state->ranges[state->next_range++]; - state->remain = range->len; + state->remain = range->length; state->offset = range->offset; } if (state->remain == 0) - goto maybe_ended; + return EOF; fz_seek(ctx, state->chain, state->offset, 0); n = fz_available(ctx, state->chain, max); if (n > state->remain) @@ -45,21 +96,107 @@ next_null(fz_context *ctx, fz_stream *stm, size_t max) stm->rp = state->buffer; stm->wp = stm->rp + n; if (n == 0) - goto maybe_ended; + return EOF; state->chain->rp += n; state->remain -= n; - state->offset += (int64_t)n; - stm->pos += (int64_t)n; + state->offset += n; + stm->pos += n; return *stm->rp++; +} -maybe_ended: - if (state->look_for_endstream == 0) +static void +close_range(fz_context *ctx, void *state_) +{ + struct range_filter *state = (struct range_filter *)state_; + fz_drop_stream(ctx, state->chain); + fz_free(ctx, state->ranges); + fz_free(ctx, state); +} + +fz_stream * +fz_open_range_filter(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges) +{ + struct range_filter *state = NULL; + + state = fz_malloc_struct(ctx, struct range_filter); + fz_try(ctx) + { + if (nranges > 0) + { + state->ranges = fz_calloc(ctx, nranges, sizeof(*ranges)); + memcpy(state->ranges, ranges, nranges * sizeof(*ranges)); + state->nranges = nranges; + state->next_range = 1; + state->remain = ranges[0].length; + state->offset = ranges[0].offset; + } + else + { + state->ranges = NULL; + state->nranges = 0; + state->next_range = 1; + state->remain = 0; + state->offset = 0; + } + state->chain = fz_keep_stream(ctx, chain); + } + fz_catch(ctx) + { + fz_free(ctx, state->ranges); + fz_free(ctx, state); + fz_rethrow(ctx); + } + + return fz_new_stream(ctx, state, next_range, close_range); +} + +/* + * The endstream filter reads a PDF substream, and starts to look for an 'endstream' token + * after the specified length. + */ + +#define END_CHECK_SIZE 32 + +struct endstream_filter +{ + fz_stream *chain; + size_t remain, extras, size; + int64_t offset; + int warned; + unsigned char buffer[4096]; +}; + +static int +next_endstream(fz_context *ctx, fz_stream *stm, size_t max) +{ + struct endstream_filter *state = stm->state; + size_t n, nbytes_in_buffer, size; + unsigned char *rp; + + if (state->remain == 0) + goto look_for_endstream; + + fz_seek(ctx, state->chain, state->offset, 0); + n = fz_available(ctx, state->chain, max); + if (n == 0) return EOF; + if (n > state->remain) + n = state->remain; + if (n > sizeof(state->buffer)) + n = sizeof(state->buffer); + memcpy(state->buffer, state->chain->rp, n); + stm->rp = state->buffer; + stm->wp = stm->rp + n; + state->chain->rp += n; + state->remain -= n; + state->offset += n; + stm->pos += n; + return *stm->rp++; +look_for_endstream: /* We should distrust the stream length, and check for end * marker before terminating the stream - this is to cope * with files with duff "Length" values. */ - fz_seek(ctx, state->chain, state->offset, 0); /* Move any data left over in our buffer down to the start. * Ordinarily, there won't be any, but this allows for the @@ -75,11 +212,13 @@ maybe_ended: * should only need (say) 32 bytes to be sure. For crap files * where we overread regularly, don't harm performance by * working in small chunks. */ - state->size *= 2; - if (state->size > sizeof(state->buffer)) - state->size = sizeof(state->buffer); -#define END_CHECK_SIZE 32 - size = state->size; + size = state->size * 2; + if (size > sizeof(state->buffer)) + size = sizeof(state->buffer); + state->size = size; + + /* Read enough data into our buffer to start looking for the 'endstream' token. */ + fz_seek(ctx, state->chain, state->offset, 0); while (nbytes_in_buffer < size) { n = fz_available(ctx, state->chain, size - nbytes_in_buffer); @@ -91,123 +230,64 @@ maybe_ended: stm->wp += n; state->chain->rp += n; nbytes_in_buffer += n; + state->offset += n; } - *stm->wp = 0; /* Be friendly to strcmp */ - rp = (char *)state->buffer; - n = 0; - /* If we don't have at least 11 bytes in the buffer, then we don't have - * enough bytes for the worst case terminator. Also, we're dangerously - * close to the end of the file. Don't risk overrunning the buffer. */ - if (nbytes_in_buffer >= 11) - for (i = 0; i < nbytes_in_buffer - 11; ) - { - n = i; - if (rp[i] == '\r') - i++; - if (rp[i] == '\n') - i++; - if (rp[i++] != 'e') - continue; - if (rp[i++] != 'n') - continue; - if (rp[i++] != 'd') - continue; - if (memcmp(&rp[i], "stream", 6) == 0 || (memcmp(&rp[i], "obj", 3) == 0)) - break; - i++; - } + /* Look for the 'endstream' token. */ + rp = fz_memmem(state->buffer, nbytes_in_buffer, "endstream", 9); + if (rp) + { + /* Include newline (CR|LF|CRLF) before 'endstream' token */ + if (rp > state->buffer && rp[-1] == '\n') --rp; + if (rp > state->buffer && rp[-1] == '\r') --rp; + n = rp - state->buffer; + stm->eof = 1; /* We're done, don't call us again! */ + } + else if (nbytes_in_buffer > 11) /* 11 covers enough data to detect "\r?\n?endstream" */ + n = nbytes_in_buffer - 11; /* no endstream, but there is more data */ + else + n = nbytes_in_buffer; /* no endstream, but at the end of the file */ /* We have at least n bytes before we hit an end marker */ - state->offset += (int64_t)nbytes_in_buffer - state->extras; state->extras = nbytes_in_buffer - n; stm->wp = stm->rp + n; stm->pos += n; if (n == 0) return EOF; + + if (!state->warned) + { + state->warned = 1; + fz_warn(ctx, "PDF stream Length incorrect"); + } return *stm->rp++; } static void -close_null(fz_context *ctx, void *state_) +close_endstream(fz_context *ctx, void *state_) { - struct null_filter *state = (struct null_filter *)state_; + struct endstream_filter *state = (struct endstream_filter *)state_; fz_drop_stream(ctx, state->chain); - fz_free(ctx, state->ranges); fz_free(ctx, state); } -static fz_stream * -fz_open_null_n_terminator(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges, int terminator) -{ - struct null_filter *state = NULL; - - state = fz_malloc_struct(ctx, struct null_filter); - fz_try(ctx) - { - if (nranges > 0) - { - state->ranges = fz_calloc(ctx, nranges, sizeof(*ranges)); - memcpy(state->ranges, ranges, nranges * sizeof(*ranges)); - state->look_for_endstream = terminator; - state->nranges = nranges; - state->next_range = 1; - state->remain = ranges[0].len; - state->offset = ranges[0].offset; - state->extras = 0; - state->size = END_CHECK_SIZE>>1; - } - else - { - state->ranges = NULL; - state->nranges = 0; - state->next_range = 1; - state->remain = 0; - state->offset = 0; - } - state->chain = fz_keep_stream(ctx, chain); - } - fz_catch(ctx) - { - fz_free(ctx, state->ranges); - fz_free(ctx, state); - fz_rethrow(ctx); - } - - return fz_new_stream(ctx, state, next_null, close_null); -} - -fz_stream * -fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges) -{ - return fz_open_null_n_terminator(ctx, chain, ranges, nranges, 0); -} - fz_stream * -fz_open_null(fz_context *ctx, fz_stream *chain, int len, int64_t offset) +fz_open_endstream_filter(fz_context *ctx, fz_stream *chain, int len, int64_t offset) { - fz_range range; + struct endstream_filter *state; if (len < 0) len = 0; - range.offset = offset; - range.len = len; - return fz_open_null_n_terminator(ctx, chain, &range, 1, 0); -} - -fz_stream * -fz_open_pdf_stream(fz_context *ctx, fz_stream *chain, int len, int64_t offset) -{ - fz_range range; - - if (len < 0) - len = 0; + state = fz_malloc_struct(ctx, struct endstream_filter); + state->chain = fz_keep_stream(ctx, chain); + state->remain = len; + state->offset = offset; + state->extras = 0; + state->size = END_CHECK_SIZE >> 1; /* size is doubled first thing when used */ - range.offset = offset; - range.len = len; - return fz_open_null_n_terminator(ctx, chain, &range, 1, 1); + return fz_new_stream(ctx, state, next_endstream, close_endstream); } /* Concat filter concatenates several streams into one */ @@ -238,7 +318,7 @@ next_concat(fz_context *ctx, fz_stream *stm, size_t max) { stm->rp = state->chain[state->current]->rp; stm->wp = state->chain[state->current]->wp; - stm->pos += (int64_t)n; + stm->pos += n; return *stm->rp++; } else @@ -675,7 +755,7 @@ next_arc4(fz_context *ctx, fz_stream *stm, size_t max) stm->wp = state->buffer + n; fz_arc4_encrypt(&state->arc4, stm->rp, state->chain->rp, n); state->chain->rp += n; - stm->pos += (int64_t)n; + stm->pos += n; return *stm->rp++; } diff --git a/source/fitz/untar.c b/source/fitz/untar.c index 1588a8ac..9aac222b 100644 --- a/source/fitz/untar.c +++ b/source/fitz/untar.c @@ -118,7 +118,7 @@ static fz_stream *open_tar_entry(fz_context *ctx, fz_archive *arch, const char * fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named tar archive entry"); fz_seek(ctx, file, ent->offset + 512, 0); - return fz_open_null(ctx, file, ent->size, fz_tell(ctx, file)); + return fz_open_null_filter(ctx, file, ent->size, fz_tell(ctx, file)); } static fz_buffer *read_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) diff --git a/source/fitz/unzip.c b/source/fitz/unzip.c index 4eb90dda..dfe4fb6b 100644 --- a/source/fitz/unzip.c +++ b/source/fitz/unzip.c @@ -286,7 +286,7 @@ static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *arch, const char * method = read_zip_entry_header(ctx, zip, ent); if (method == 0) - return fz_open_null(ctx, file, ent->usize, fz_tell(ctx, file)); + return fz_open_null_filter(ctx, file, ent->usize, fz_tell(ctx, file)); if (method == 8) return fz_open_flated(ctx, file, -15); fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d", method); -- cgit v1.2.3