summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/fitz/filter.h1
-rw-r--r--source/fitz/filter-basic.c114
-rw-r--r--source/pdf/pdf-stream.c2
3 files changed, 110 insertions, 7 deletions
diff --git a/include/mupdf/fitz/filter.h b/include/mupdf/fitz/filter.h
index a570dd71..bdce36e8 100644
--- a/include/mupdf/fitz/filter.h
+++ b/include/mupdf/fitz/filter.h
@@ -17,6 +17,7 @@ typedef struct
fz_stream *fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges);
fz_stream *fz_open_null(fz_context *ctx, fz_stream *chain, int len, int64_t offset);
+fz_stream *fz_open_pdf_stream(fz_context *ctx, fz_stream *chain, int len, int64_t offset);
fz_stream *fz_open_concat(fz_context *ctx, int max, int pad);
void fz_concat_push_drop(fz_context *ctx, fz_stream *concat, fz_stream *chain); /* Ownership of chain is passed in */
fz_stream *fz_open_arc4(fz_context *ctx, fz_stream *chain, unsigned char *key, unsigned keylen);
diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c
index 5e03a176..6928ffff 100644
--- a/source/fitz/filter-basic.c
+++ b/source/fitz/filter-basic.c
@@ -8,9 +8,12 @@ struct null_filter
{
fz_stream *chain;
fz_range *ranges;
+ int look_for_endstream;
int nranges;
int next_range;
size_t remain;
+ unsigned int extras;
+ unsigned int size;
int64_t offset;
unsigned char buffer[4096];
};
@@ -19,7 +22,9 @@ static int
next_null(fz_context *ctx, fz_stream *stm, size_t max)
{
struct null_filter *state = stm->state;
- size_t n;
+ size_t n, i, nbytes_in_buffer;
+ const char *rp;
+ unsigned int size;
while (state->remain == 0 && state->next_range < state->nranges)
{
@@ -29,7 +34,7 @@ next_null(fz_context *ctx, fz_stream *stm, size_t max)
}
if (state->remain == 0)
- return EOF;
+ goto maybe_ended;
fz_seek(ctx, state->chain, state->offset, 0);
n = fz_available(ctx, state->chain, max);
if (n > state->remain)
@@ -40,12 +45,88 @@ next_null(fz_context *ctx, fz_stream *stm, size_t max)
stm->rp = state->buffer;
stm->wp = stm->rp + n;
if (n == 0)
- return EOF;
+ goto maybe_ended;
state->chain->rp += n;
state->remain -= n;
state->offset += (int64_t)n;
stm->pos += (int64_t)n;
return *stm->rp++;
+
+maybe_ended:
+ if (state->look_for_endstream == 0)
+ return EOF;
+
+ /* We should distrust the stream length, and check for end
+ * marker before terminating the stream - this is to cope
+ * with files with duff "Length" values. */
+ fz_seek(ctx, state->chain, state->offset, 0);
+
+ /* Move any data left over in our buffer down to the start.
+ * Ordinarily, there won't be any, but this allows for the
+ * case where we were part way through matching a stream end
+ * marker when the buffer filled before. */
+ nbytes_in_buffer = state->extras;
+ if (nbytes_in_buffer)
+ memmove(state->buffer, stm->rp, nbytes_in_buffer);
+ stm->rp = state->buffer;
+ stm->wp = stm->rp + nbytes_in_buffer;
+
+ /* In most sane files, we'll get "\nendstream" instantly. We
+ * should only need (say) 32 bytes to be sure. For crap files
+ * where we overread regularly, don't harm performance by
+ * working in small chunks. */
+ state->size *= 2;
+ if (state->size > sizeof(state->buffer))
+ state->size = sizeof(state->buffer);
+#define END_CHECK_SIZE 32
+ size = state->size;
+ while (nbytes_in_buffer < size)
+ {
+ n = fz_available(ctx, state->chain, size - nbytes_in_buffer);
+ if (n == 0)
+ break;
+ if (n > size - nbytes_in_buffer)
+ n = size - nbytes_in_buffer;
+ memcpy(stm->wp, state->chain->rp, n);
+ stm->wp += n;
+ state->chain->rp += n;
+ nbytes_in_buffer += n;
+ }
+
+ *stm->wp = 0; /* Be friendly to strcmp */
+ rp = (char *)state->buffer;
+ n = 0;
+ /* If we don't have at least 11 bytes in the buffer, then we don't have
+ * enough bytes for the worst case terminator. Also, we're dangerously
+ * close to the end of the file. Don't risk overrunning the buffer. */
+ if (nbytes_in_buffer >= 11)
+ for (i = 0; i < nbytes_in_buffer - 11; )
+ {
+ n = i;
+ if (rp[i] == '\r')
+ i++;
+ if (rp[i] == '\n')
+ i++;
+ if (rp[i++] != 'e')
+ continue;
+ if (rp[i++] != 'n')
+ continue;
+ if (rp[i++] != 'd')
+ continue;
+ if (memcmp(&rp[i], "stream", 6) == 0 || (memcmp(&rp[i], "obj", 3) == 0))
+ break;
+ i++;
+ }
+
+ /* We have at least n bytes before we hit an end marker */
+ state->offset += (int64_t)nbytes_in_buffer - state->extras;
+ state->extras = nbytes_in_buffer - n;
+ stm->wp = stm->rp + n;
+ stm->pos += n;
+
+ if (n == 0)
+ return EOF;
+ return *stm->rp++;
}
static void
@@ -57,8 +138,8 @@ close_null(fz_context *ctx, void *state_)
fz_free(ctx, state);
}
-fz_stream *
-fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
+static fz_stream *
+fz_open_null_n_terminator(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges, int terminator)
{
struct null_filter *state = NULL;
@@ -69,10 +150,13 @@ fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
{
state->ranges = fz_calloc(ctx, nranges, sizeof(*ranges));
memcpy(state->ranges, ranges, nranges * sizeof(*ranges));
+ state->look_for_endstream = terminator;
state->nranges = nranges;
state->next_range = 1;
state->remain = ranges[0].len;
state->offset = ranges[0].offset;
+ state->extras = 0;
+ state->size = END_CHECK_SIZE>>1;
}
else
{
@@ -95,6 +179,12 @@ fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
}
fz_stream *
+fz_open_null_n(fz_context *ctx, fz_stream *chain, fz_range *ranges, int nranges)
+{
+ return fz_open_null_n_terminator(ctx, chain, ranges, nranges, 0);
+}
+
+fz_stream *
fz_open_null(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
{
fz_range range;
@@ -104,9 +194,21 @@ fz_open_null(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
range.offset = offset;
range.len = len;
- return fz_open_null_n(ctx, chain, &range, 1);
+ return fz_open_null_n_terminator(ctx, chain, &range, 1, 0);
}
+fz_stream *
+fz_open_pdf_stream(fz_context *ctx, fz_stream *chain, int len, int64_t offset)
+{
+ fz_range range;
+
+ if (len < 0)
+ len = 0;
+
+ range.offset = offset;
+ range.len = len;
+ return fz_open_null_n_terminator(ctx, chain, &range, 1, 1);
+}
/* Concat filter concatenates several streams into one */
diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c
index 04f1ed46..796e2490 100644
--- a/source/pdf/pdf-stream.c
+++ b/source/pdf/pdf-stream.c
@@ -298,7 +298,7 @@ pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf
hascrypt = pdf_stream_has_crypt(ctx, stmobj);
len = pdf_dict_get_int(ctx, stmobj, PDF_NAME(Length));
- null_stm = fz_open_null(ctx, file_stm, len, offset);
+ null_stm = fz_open_pdf_stream(ctx, file_stm, len, offset);
if (doc->crypt && !hascrypt)
{
fz_try(ctx)