summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2018-07-10 12:58:57 +0200
committerTor Andersson <tor.andersson@artifex.com>2018-08-10 12:09:33 +0200
commit09f2e173850e011e6390c49a4f761e87dd87ffba (patch)
tree6026165eb2c859792e1c9c3578d470a4e6837f72
parent62876a7025e31897e7ccb92ff8d461d3fef6ddb4 (diff)
downloadmupdf-09f2e173850e011e6390c49a4f761e87dd87ffba.tar.xz
Clean up null/range/endstream filter.
Use separate functions to keep the code simpler. Use memmem to simplify and optimize search for 'endstream' token. Do not look for 'endobj' since that could cause a false positives in compressed object streams that have duff lengths.
-rw-r--r--include/mupdf/fitz/filter.h8
-rw-r--r--source/fitz/filter-basic.c310
-rw-r--r--source/fitz/untar.c2
-rw-r--r--source/fitz/unzip.c2
-rw-r--r--source/pdf/pdf-form.c4
-rw-r--r--source/pdf/pdf-signature.c4
-rw-r--r--source/pdf/pdf-stream.c4
7 files changed, 207 insertions, 127 deletions
diff --git a/include/mupdf/fitz/filter.h b/include/mupdf/fitz/filter.h
index bdce36e8..5dd136a1 100644
--- a/include/mupdf/fitz/filter.h
+++ b/include/mupdf/fitz/filter.h
@@ -12,12 +12,12 @@ typedef struct fz_jbig2_globals_s fz_jbig2_globals;
typedef struct
{
int64_t offset;
- int len;
+ size_t length;
} fz_range;
-fz_stream *fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges);
-fz_stream *fz_open_null(fz_context *ctx, fz_stream *chain, int len, int64_t offset);
-fz_stream *fz_open_pdf_stream(fz_context *ctx, fz_stream *chain, int len, int64_t offset);
+fz_stream *fz_open_null_filter(fz_context *ctx, fz_stream *chain, int len, int64_t offset);
+fz_stream *fz_open_range_filter(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges);
+fz_stream *fz_open_endstream_filter(fz_context *ctx, fz_stream *chain, int len, int64_t offset);
fz_stream *fz_open_concat(fz_context *ctx, int max, int pad);
void fz_concat_push_drop(fz_context *ctx, fz_stream *concat, fz_stream *chain); /* Ownership of chain is passed in */
fz_stream *fz_open_arc4(fz_context *ctx, fz_stream *chain, unsigned char *key, unsigned keylen);
diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c
index 6928ffff..0713a62e 100644
--- a/source/fitz/filter-basic.c
+++ b/source/fitz/filter-basic.c
@@ -2,39 +2,90 @@
#include <string.h>
-/* Null filter copies a specified amount of data */
+/* The null filter reads a specified amount of data from the substream. */
struct null_filter
{
fz_stream *chain;
+ size_t remain;
+ int64_t offset;
+ unsigned char buffer[4096];
+};
+
+static int
+next_null(fz_context *ctx, fz_stream *stm, size_t max)
+{
+ struct null_filter *state = stm->state;
+ size_t n;
+
+ if (state->remain == 0)
+ return EOF;
+
+ fz_seek(ctx, state->chain, state->offset, 0);
+ n = fz_available(ctx, state->chain, max);
+ if (n == 0)
+ return EOF;
+ if (n > state->remain)
+ n = state->remain;
+ if (n > sizeof(state->buffer))
+ n = sizeof(state->buffer);
+
+ memcpy(state->buffer, state->chain->rp, n);
+ stm->rp = state->buffer;
+ stm->wp = stm->rp + n;
+ state->chain->rp += n;
+ state->remain -= n;
+ state->offset += n;
+ stm->pos += n;
+ return *stm->rp++;
+}
+
+static void
+close_null(fz_context *ctx, void *state_)
+{
+ struct null_filter *state = (struct null_filter *)state_;
+ fz_drop_stream(ctx, state->chain);
+ fz_free(ctx, state);
+}
+
+fz_stream *
+fz_open_null_filter(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
+{
+ struct null_filter *state = fz_malloc_struct(ctx, struct null_filter);
+ state->chain = fz_keep_stream(ctx, chain);
+ state->remain = len;
+ state->offset = offset;
+ return fz_new_stream(ctx, state, next_null, close_null);
+}
+
+/* The range filter copies data from specified ranges of the chained stream */
+
+struct range_filter
+{
+ fz_stream *chain;
fz_range *ranges;
- int look_for_endstream;
int nranges;
int next_range;
size_t remain;
- unsigned int extras;
- unsigned int size;
int64_t offset;
unsigned char buffer[4096];
};
static int
-next_null(fz_context *ctx, fz_stream *stm, size_t max)
+next_range(fz_context *ctx, fz_stream *stm, size_t max)
{
- struct null_filter *state = stm->state;
- size_t n, i, nbytes_in_buffer;
- const char *rp;
- unsigned int size;
+ struct range_filter *state = stm->state;
+ size_t n;
while (state->remain == 0 && state->next_range < state->nranges)
{
fz_range *range = &state->ranges[state->next_range++];
- state->remain = range->len;
+ state->remain = range->length;
state->offset = range->offset;
}
if (state->remain == 0)
- goto maybe_ended;
+ return EOF;
fz_seek(ctx, state->chain, state->offset, 0);
n = fz_available(ctx, state->chain, max);
if (n > state->remain)
@@ -45,21 +96,107 @@ next_null(fz_context *ctx, fz_stream *stm, size_t max)
stm->rp = state->buffer;
stm->wp = stm->rp + n;
if (n == 0)
- goto maybe_ended;
+ return EOF;
state->chain->rp += n;
state->remain -= n;
- state->offset += (int64_t)n;
- stm->pos += (int64_t)n;
+ state->offset += n;
+ stm->pos += n;
return *stm->rp++;
+}
-maybe_ended:
- if (state->look_for_endstream == 0)
+static void
+close_range(fz_context *ctx, void *state_)
+{
+ struct range_filter *state = (struct range_filter *)state_;
+ fz_drop_stream(ctx, state->chain);
+ fz_free(ctx, state->ranges);
+ fz_free(ctx, state);
+}
+
+fz_stream *
+fz_open_range_filter(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
+{
+ struct range_filter *state = NULL;
+
+ state = fz_malloc_struct(ctx, struct range_filter);
+ fz_try(ctx)
+ {
+ if (nranges > 0)
+ {
+ state->ranges = fz_calloc(ctx, nranges, sizeof(*ranges));
+ memcpy(state->ranges, ranges, nranges * sizeof(*ranges));
+ state->nranges = nranges;
+ state->next_range = 1;
+ state->remain = ranges[0].length;
+ state->offset = ranges[0].offset;
+ }
+ else
+ {
+ state->ranges = NULL;
+ state->nranges = 0;
+ state->next_range = 1;
+ state->remain = 0;
+ state->offset = 0;
+ }
+ state->chain = fz_keep_stream(ctx, chain);
+ }
+ fz_catch(ctx)
+ {
+ fz_free(ctx, state->ranges);
+ fz_free(ctx, state);
+ fz_rethrow(ctx);
+ }
+
+ return fz_new_stream(ctx, state, next_range, close_range);
+}
+
+/*
+ * The endstream filter reads a PDF substream, and starts to look for an 'endstream' token
+ * after the specified length.
+ */
+
+#define END_CHECK_SIZE 32
+
+struct endstream_filter
+{
+ fz_stream *chain;
+ size_t remain, extras, size;
+ int64_t offset;
+ int warned;
+ unsigned char buffer[4096];
+};
+
+static int
+next_endstream(fz_context *ctx, fz_stream *stm, size_t max)
+{
+ struct endstream_filter *state = stm->state;
+ size_t n, nbytes_in_buffer, size;
+ unsigned char *rp;
+
+ if (state->remain == 0)
+ goto look_for_endstream;
+
+ fz_seek(ctx, state->chain, state->offset, 0);
+ n = fz_available(ctx, state->chain, max);
+ if (n == 0)
return EOF;
+ if (n > state->remain)
+ n = state->remain;
+ if (n > sizeof(state->buffer))
+ n = sizeof(state->buffer);
+ memcpy(state->buffer, state->chain->rp, n);
+ stm->rp = state->buffer;
+ stm->wp = stm->rp + n;
+ state->chain->rp += n;
+ state->remain -= n;
+ state->offset += n;
+ stm->pos += n;
+ return *stm->rp++;
+look_for_endstream:
/* We should distrust the stream length, and check for end
* marker before terminating the stream - this is to cope
* with files with duff "Length" values. */
- fz_seek(ctx, state->chain, state->offset, 0);
/* Move any data left over in our buffer down to the start.
* Ordinarily, there won't be any, but this allows for the
@@ -75,11 +212,13 @@ maybe_ended:
* should only need (say) 32 bytes to be sure. For crap files
* where we overread regularly, don't harm performance by
* working in small chunks. */
- state->size *= 2;
- if (state->size > sizeof(state->buffer))
- state->size = sizeof(state->buffer);
-#define END_CHECK_SIZE 32
- size = state->size;
+ size = state->size * 2;
+ if (size > sizeof(state->buffer))
+ size = sizeof(state->buffer);
+ state->size = size;
+
+ /* Read enough data into our buffer to start looking for the 'endstream' token. */
+ fz_seek(ctx, state->chain, state->offset, 0);
while (nbytes_in_buffer < size)
{
n = fz_available(ctx, state->chain, size - nbytes_in_buffer);
@@ -91,123 +230,64 @@ maybe_ended:
stm->wp += n;
state->chain->rp += n;
nbytes_in_buffer += n;
+ state->offset += n;
}
- *stm->wp = 0; /* Be friendly to strcmp */
- rp = (char *)state->buffer;
- n = 0;
- /* If we don't have at least 11 bytes in the buffer, then we don't have
- * enough bytes for the worst case terminator. Also, we're dangerously
- * close to the end of the file. Don't risk overrunning the buffer. */
- if (nbytes_in_buffer >= 11)
- for (i = 0; i < nbytes_in_buffer - 11; )
- {
- n = i;
- if (rp[i] == '\r')
- i++;
- if (rp[i] == '\n')
- i++;
- if (rp[i++] != 'e')
- continue;
- if (rp[i++] != 'n')
- continue;
- if (rp[i++] != 'd')
- continue;
- if (memcmp(&rp[i], "stream", 6) == 0 || (memcmp(&rp[i], "obj", 3) == 0))
- break;
- i++;
- }
+ /* Look for the 'endstream' token. */
+ rp = fz_memmem(state->buffer, nbytes_in_buffer, "endstream", 9);
+ if (rp)
+ {
+ /* Include newline (CR|LF|CRLF) before 'endstream' token */
+ if (rp > state->buffer && rp[-1] == '\n') --rp;
+ if (rp > state->buffer && rp[-1] == '\r') --rp;
+ n = rp - state->buffer;
+ stm->eof = 1; /* We're done, don't call us again! */
+ }
+ else if (nbytes_in_buffer > 11) /* 11 covers enough data to detect "\r?\n?endstream" */
+ n = nbytes_in_buffer - 11; /* no endstream, but there is more data */
+ else
+ n = nbytes_in_buffer; /* no endstream, but at the end of the file */
/* We have at least n bytes before we hit an end marker */
- state->offset += (int64_t)nbytes_in_buffer - state->extras;
state->extras = nbytes_in_buffer - n;
stm->wp = stm->rp + n;
stm->pos += n;
if (n == 0)
return EOF;
+
+ if (!state->warned)
+ {
+ state->warned = 1;
+ fz_warn(ctx, "PDF stream Length incorrect");
+ }
return *stm->rp++;
}
static void
-close_null(fz_context *ctx, void *state_)
+close_endstream(fz_context *ctx, void *state_)
{
- struct null_filter *state = (struct null_filter *)state_;
+ struct endstream_filter *state = (struct endstream_filter *)state_;
fz_drop_stream(ctx, state->chain);
- fz_free(ctx, state->ranges);
fz_free(ctx, state);
}
-static fz_stream *
-fz_open_null_n_terminator(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges, int terminator)
-{
- struct null_filter *state = NULL;
-
- state = fz_malloc_struct(ctx, struct null_filter);
- fz_try(ctx)
- {
- if (nranges > 0)
- {
- state->ranges = fz_calloc(ctx, nranges, sizeof(*ranges));
- memcpy(state->ranges, ranges, nranges * sizeof(*ranges));
- state->look_for_endstream = terminator;
- state->nranges = nranges;
- state->next_range = 1;
- state->remain = ranges[0].len;
- state->offset = ranges[0].offset;
- state->extras = 0;
- state->size = END_CHECK_SIZE>>1;
- }
- else
- {
- state->ranges = NULL;
- state->nranges = 0;
- state->next_range = 1;
- state->remain = 0;
- state->offset = 0;
- }
- state->chain = fz_keep_stream(ctx, chain);
- }
- fz_catch(ctx)
- {
- fz_free(ctx, state->ranges);
- fz_free(ctx, state);
- fz_rethrow(ctx);
- }
-
- return fz_new_stream(ctx, state, next_null, close_null);
-}
-
-fz_stream *
-fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
-{
- return fz_open_null_n_terminator(ctx, chain, ranges, nranges, 0);
-}
-
fz_stream *
-fz_open_null(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
+fz_open_endstream_filter(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
{
- fz_range range;
+ struct endstream_filter *state;
if (len < 0)
len = 0;
- range.offset = offset;
- range.len = len;
- return fz_open_null_n_terminator(ctx, chain, &range, 1, 0);
-}
-
-fz_stream *
-fz_open_pdf_stream(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
-{
- fz_range range;
-
- if (len < 0)
- len = 0;
+ state = fz_malloc_struct(ctx, struct endstream_filter);
+ state->chain = fz_keep_stream(ctx, chain);
+ state->remain = len;
+ state->offset = offset;
+ state->extras = 0;
+ state->size = END_CHECK_SIZE >> 1; /* size is doubled first thing when used */
- range.offset = offset;
- range.len = len;
- return fz_open_null_n_terminator(ctx, chain, &range, 1, 1);
+ return fz_new_stream(ctx, state, next_endstream, close_endstream);
}
/* Concat filter concatenates several streams into one */
@@ -238,7 +318,7 @@ next_concat(fz_context *ctx, fz_stream *stm, size_t max)
{
stm->rp = state->chain[state->current]->rp;
stm->wp = state->chain[state->current]->wp;
- stm->pos += (int64_t)n;
+ stm->pos += n;
return *stm->rp++;
}
else
@@ -675,7 +755,7 @@ next_arc4(fz_context *ctx, fz_stream *stm, size_t max)
stm->wp = state->buffer + n;
fz_arc4_encrypt(&state->arc4, stm->rp, state->chain->rp, n);
state->chain->rp += n;
- stm->pos += (int64_t)n;
+ stm->pos += n;
return *stm->rp++;
}
diff --git a/source/fitz/untar.c b/source/fitz/untar.c
index 1588a8ac..9aac222b 100644
--- a/source/fitz/untar.c
+++ b/source/fitz/untar.c
@@ -118,7 +118,7 @@ static fz_stream *open_tar_entry(fz_context *ctx, fz_archive *arch, const char *
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named tar archive entry");
fz_seek(ctx, file, ent->offset + 512, 0);
- return fz_open_null(ctx, file, ent->size, fz_tell(ctx, file));
+ return fz_open_null_filter(ctx, file, ent->size, fz_tell(ctx, file));
}
static fz_buffer *read_tar_entry(fz_context *ctx, fz_archive *arch, const char *name)
diff --git a/source/fitz/unzip.c b/source/fitz/unzip.c
index 4eb90dda..dfe4fb6b 100644
--- a/source/fitz/unzip.c
+++ b/source/fitz/unzip.c
@@ -286,7 +286,7 @@ static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *arch, const char *
method = read_zip_entry_header(ctx, zip, ent);
if (method == 0)
- return fz_open_null(ctx, file, ent->usize, fz_tell(ctx, file));
+ return fz_open_null_filter(ctx, file, ent->usize, fz_tell(ctx, file));
if (method == 8)
return fz_open_flated(ctx, file, -15);
fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d", method);
diff --git a/source/pdf/pdf-form.c b/source/pdf/pdf-form.c
index d6969ce7..a838b4b8 100644
--- a/source/pdf/pdf-form.c
+++ b/source/pdf/pdf-form.c
@@ -1245,7 +1245,7 @@ int pdf_signature_widget_byte_range(fz_context *ctx, pdf_document *doc, pdf_widg
for (i = 0; i < n; i++)
{
byte_range[i].offset = pdf_array_get_int(ctx, br, 2*i);
- byte_range[i].len = pdf_array_get_int(ctx, br, 2*i+1);
+ byte_range[i].length = pdf_array_get_int(ctx, br, 2*i+1);
}
}
@@ -1268,7 +1268,7 @@ fz_stream *pdf_signature_widget_hash_bytes(fz_context *ctx, pdf_document *doc, p
pdf_signature_widget_byte_range(ctx, doc, widget, byte_range);
}
- bytes = fz_open_null_n(ctx, doc->file, byte_range, byte_range_len);
+ bytes = fz_open_range_filter(ctx, doc->file, byte_range, byte_range_len);
}
fz_always(ctx)
{
diff --git a/source/pdf/pdf-signature.c b/source/pdf/pdf-signature.c
index 4d62e7f1..cb7a807b 100644
--- a/source/pdf/pdf-signature.c
+++ b/source/pdf/pdf-signature.c
@@ -29,11 +29,11 @@ void pdf_write_digest(fz_context *ctx, fz_output *out, pdf_obj *byte_range, int
for (i = 0; i < brange_len; i++)
{
brange[i].offset = pdf_array_get_int(ctx, byte_range, 2*i);
- brange[i].len = pdf_array_get_int(ctx, byte_range, 2*i+1);
+ brange[i].length = pdf_array_get_int(ctx, byte_range, 2*i+1);
}
stm = fz_stream_from_output(ctx, out);
- in = fz_open_null_n(ctx, stm, brange, brange_len);
+ in = fz_open_range_filter(ctx, stm, brange, brange_len);
digest_len = (hexdigest_length - 2) / 2;
digest = fz_malloc(ctx, digest_len);
diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c
index 796e2490..54d3d561 100644
--- a/source/pdf/pdf-stream.c
+++ b/source/pdf/pdf-stream.c
@@ -298,7 +298,7 @@ pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf
hascrypt = pdf_stream_has_crypt(ctx, stmobj);
len = pdf_dict_get_int(ctx, stmobj, PDF_NAME(Length));
- null_stm = fz_open_pdf_stream(ctx, file_stm, len, offset);
+ null_stm = fz_open_endstream_filter(ctx, file_stm, len, offset);
if (doc->crypt && !hascrypt)
{
fz_try(ctx)
@@ -359,7 +359,7 @@ pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int
if (imparams)
imparams->type = FZ_IMAGE_RAW;
- return fz_open_null(ctx, file_stm, length, fz_tell(ctx, file_stm));
+ return fz_open_null_filter(ctx, file_stm, length, fz_tell(ctx, file_stm));
}
void