summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorRobin Watts <Robin.Watts@artifex.com>2018-05-31 15:59:53 +0100
committerRobin Watts <robin.watts@artifex.com>2018-07-06 18:00:03 +0100
commit75c457ddd28a629a9e1d6e1a8fa313ffef1457cb (patch)
tree96099a85afbfcaa5af457aa230dfcb89fbd99825 /source
parent533684eb51bb72df871a63eb17e589e2ec9bb547 (diff)
downloadmupdf-75c457ddd28a629a9e1d6e1a8fa313ffef1457cb.tar.xz
Bug 699308: Fix stream reading logic to better cope with duff Lengths.
Always look for the "endstream" marker after a PDF stream to see if we've hit the end. Allow for "endobj" to cope with producers that omit endstream entirely. Avoid slowing down legal files by only checking for the end marker after the specified length has been read.
Diffstat (limited to 'source')
-rw-r--r--source/fitz/filter-basic.c114
-rw-r--r--source/pdf/pdf-stream.c2
2 files changed, 109 insertions, 7 deletions
diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c
index 5e03a176..6928ffff 100644
--- a/source/fitz/filter-basic.c
+++ b/source/fitz/filter-basic.c
@@ -8,9 +8,12 @@ struct null_filter
{
fz_stream *chain;
fz_range *ranges;
+ int look_for_endstream;
int nranges;
int next_range;
size_t remain;
+ unsigned int extras;
+ unsigned int size;
int64_t offset;
unsigned char buffer[4096];
};
@@ -19,7 +22,9 @@ static int
next_null(fz_context *ctx, fz_stream *stm, size_t max)
{
struct null_filter *state = stm->state;
- size_t n;
+ size_t n, i, nbytes_in_buffer;
+ const char *rp;
+ unsigned int size;
while (state->remain == 0 && state->next_range < state->nranges)
{
@@ -29,7 +34,7 @@ next_null(fz_context *ctx, fz_stream *stm, size_t max)
}
if (state->remain == 0)
- return EOF;
+ goto maybe_ended;
fz_seek(ctx, state->chain, state->offset, 0);
n = fz_available(ctx, state->chain, max);
if (n > state->remain)
@@ -40,12 +45,88 @@ next_null(fz_context *ctx, fz_stream *stm, size_t max)
stm->rp = state->buffer;
stm->wp = stm->rp + n;
if (n == 0)
- return EOF;
+ goto maybe_ended;
state->chain->rp += n;
state->remain -= n;
state->offset += (int64_t)n;
stm->pos += (int64_t)n;
return *stm->rp++;
+
+maybe_ended:
+ if (state->look_for_endstream == 0)
+ return EOF;
+
+ /* We should distrust the stream length, and check for end
+ * marker before terminating the stream - this is to cope
+ * with files with duff "Length" values. */
+ fz_seek(ctx, state->chain, state->offset, 0);
+
+ /* Move any data left over in our buffer down to the start.
+ * Ordinarily, there won't be any, but this allows for the
+ * case where we were part way through matching a stream end
+ * marker when the buffer filled before. */
+ nbytes_in_buffer = state->extras;
+ if (nbytes_in_buffer)
+ memmove(state->buffer, stm->rp, nbytes_in_buffer);
+ stm->rp = state->buffer;
+ stm->wp = stm->rp + nbytes_in_buffer;
+
+ /* In most sane files, we'll get "\nendstream" instantly. We
+ * should only need (say) 32 bytes to be sure. For crap files
+ * where we overread regularly, don't harm performance by
+ * working in small chunks. */
+ state->size *= 2;
+ if (state->size > sizeof(state->buffer))
+ state->size = sizeof(state->buffer);
+#define END_CHECK_SIZE 32
+ size = state->size;
+ while (nbytes_in_buffer < size)
+ {
+ n = fz_available(ctx, state->chain, size - nbytes_in_buffer);
+ if (n == 0)
+ break;
+ if (n > size - nbytes_in_buffer)
+ n = size - nbytes_in_buffer;
+ memcpy(stm->wp, state->chain->rp, n);
+ stm->wp += n;
+ state->chain->rp += n;
+ nbytes_in_buffer += n;
+ }
+
+ *stm->wp = 0; /* Be friendly to strcmp */
+ rp = (char *)state->buffer;
+ n = 0;
+ /* If we don't have at least 11 bytes in the buffer, then we don't have
+ * enough bytes for the worst case terminator. Also, we're dangerously
+ * close to the end of the file. Don't risk overrunning the buffer. */
+ if (nbytes_in_buffer >= 11)
+ for (i = 0; i < nbytes_in_buffer - 11; )
+ {
+ n = i;
+ if (rp[i] == '\r')
+ i++;
+ if (rp[i] == '\n')
+ i++;
+ if (rp[i++] != 'e')
+ continue;
+ if (rp[i++] != 'n')
+ continue;
+ if (rp[i++] != 'd')
+ continue;
+ if (memcmp(&rp[i], "stream", 6) == 0 || (memcmp(&rp[i], "obj", 3) == 0))
+ break;
+ i++;
+ }
+
+ /* We have at least n bytes before we hit an end marker */
+ state->offset += (int64_t)nbytes_in_buffer - state->extras;
+ state->extras = nbytes_in_buffer - n;
+ stm->wp = stm->rp + n;
+ stm->pos += n;
+
+ if (n == 0)
+ return EOF;
+ return *stm->rp++;
}
static void
@@ -57,8 +138,8 @@ close_null(fz_context *ctx, void *state_)
fz_free(ctx, state);
}
-fz_stream *
-fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
+static fz_stream *
+fz_open_null_n_terminator(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges, int terminator)
{
struct null_filter *state = NULL;
@@ -69,10 +150,13 @@ fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
{
state->ranges = fz_calloc(ctx, nranges, sizeof(*ranges));
memcpy(state->ranges, ranges, nranges * sizeof(*ranges));
+ state->look_for_endstream = terminator;
state->nranges = nranges;
state->next_range = 1;
state->remain = ranges[0].len;
state->offset = ranges[0].offset;
+ state->extras = 0;
+ state->size = END_CHECK_SIZE>>1;
}
else
{
@@ -95,6 +179,12 @@ fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
}
fz_stream *
+fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
+{
+ return fz_open_null_n_terminator(ctx, chain, ranges, nranges, 0);
+}
+
+fz_stream *
fz_open_null(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
{
fz_range range;
@@ -104,9 +194,21 @@ fz_open_null(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
range.offset = offset;
range.len = len;
- return fz_open_null_n(ctx, chain, &range, 1);
+ return fz_open_null_n_terminator(ctx, chain, &range, 1, 0);
}
+fz_stream *
+fz_open_pdf_stream(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
+{
+ fz_range range;
+
+ if (len < 0)
+ len = 0;
+
+ range.offset = offset;
+ range.len = len;
+ return fz_open_null_n_terminator(ctx, chain, &range, 1, 1);
+}
/* Concat filter concatenates several streams into one */
diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c
index 04f1ed46..796e2490 100644
--- a/source/pdf/pdf-stream.c
+++ b/source/pdf/pdf-stream.c
@@ -298,7 +298,7 @@ pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf
hascrypt = pdf_stream_has_crypt(ctx, stmobj);
len = pdf_dict_get_int(ctx, stmobj, PDF_NAME(Length));
- null_stm = fz_open_null(ctx, file_stm, len, offset);
+ null_stm = fz_open_pdf_stream(ctx, file_stm, len, offset);
if (doc->crypt && !hascrypt)
{
fz_try(ctx)