summaryrefslogtreecommitdiff
path: root/source/pdf/pdf-interpret.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2014-03-14 20:01:32 +0000
committerRobin Watts <robin.watts@artifex.com>2014-03-18 17:48:40 +0000
commit4c2715a0bcecfed6ebdfee901920631b09364d7e (patch)
treeb0e5aa723719159a779425329ddc6ce48b8af0ce /source/pdf/pdf-interpret.c
parent551de42088c58dc69fba06fb53e36c2ddb12367f (diff)
downloadmupdf-4c2715a0bcecfed6ebdfee901920631b09364d7e.tar.xz
Fix operator buffering of inline images.
Previously pdf_process buffer did not understand inline images. In order to make this work without needlessly duplicating complex code from within pdf-op-run, the parsing of inline images has been moved to happen in pdf-interpret.c. When the op_table entry for BI is called it now expects the inline image to be in csi->img and the dictionary object to be in csi->obj. To make this work, we have had to improve the handling of inline images in general. While non-inline images have been loaded and held in memory in their compressed form and only decoded when required, until now we have always loaded and decoded inline images immediately. This has been due to the difficulty in knowing how many bytes of data to read from the stream - we know the length of the stream once uncompressed, but relating this to the compressed length is hard. To cure this we introduce a new type of filter stream, a 'leecher'. We insert a leecher stream before we build the filters required to decode the image. We then read and discard the appropriate number of uncompressed bytes from the filters. This pulls the compressed data through the leecher stream, which stores it in an fz_buffer. Thus images are now always held in their compressed forms in memory. The pdf-op-run implementation is now trivial. The only real complexity in the pdf-op-buffer implementation is the need to ensure that the /Filter entry in the dictionary object matches the exact point at which we backstopped the decompression.
Diffstat (limited to 'source/pdf/pdf-interpret.c')
-rw-r--r--source/pdf/pdf-interpret.c67
1 files changed, 67 insertions, 0 deletions
diff --git a/source/pdf/pdf-interpret.c b/source/pdf/pdf-interpret.c
index 3984d8e6..525d2ead 100644
--- a/source/pdf/pdf-interpret.c
+++ b/source/pdf/pdf-interpret.c
@@ -40,6 +40,9 @@ pdf_clear_stack(pdf_csi *csi)
{
int i;
+ fz_drop_image(csi->doc->ctx, csi->img);
+ csi->img = NULL;
+
pdf_drop_obj(csi->obj);
csi->obj = NULL;
@@ -64,6 +67,61 @@ pdf_free_csi(pdf_csi *csi)
#define B(a,b) (a | b << 8)
#define C(a,b,c) (a | b << 8 | c << 16)
+static void
+parse_inline_image(pdf_csi *csi)
+{
+ fz_context *ctx = csi->doc->ctx;
+ pdf_obj *rdb = csi->rdb;
+ fz_stream *file = csi->file;
+ int ch, found;
+
+ fz_drop_image(ctx, csi->img);
+ csi->img = NULL;
+ pdf_drop_obj(csi->obj);
+ csi->obj = NULL;
+
+ csi->obj = pdf_parse_dict(csi->doc, file, &csi->doc->lexbuf.base);
+
+ /* read whitespace after ID keyword */
+ ch = fz_read_byte(file);
+ if (ch == '\r')
+ if (fz_peek_byte(file) == '\n')
+ fz_read_byte(file);
+
+ fz_try(ctx)
+ {
+ csi->img = pdf_load_inline_image(csi->doc, rdb, csi->obj, file);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+
+ /* find EI */
+ found = 0;
+ ch = fz_read_byte(file);
+ do
+ {
+ while (ch != 'E' && ch != EOF)
+ ch = fz_read_byte(file);
+ if (ch == 'E')
+ {
+ ch = fz_read_byte(file);
+ if (ch == 'I')
+ {
+ ch = fz_peek_byte(file);
+ if (ch == ' ' || ch <= 32 || ch == EOF || ch == '<' || ch == '/')
+ {
+ found = 1;
+ break;
+ }
+ }
+ }
+ } while (ch != EOF);
+ if (!found)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error after inline image");
+}
+
static int
pdf_run_keyword(pdf_csi *csi, char *buf)
{
@@ -177,6 +235,11 @@ pdf_run_keyword(pdf_csi *csi, char *buf)
return 0;
}
+ if (op == PDF_OP_BI)
+ {
+ parse_inline_image(csi);
+ }
+
if (op < PDF_OP_Do)
{
pdf_process_op(csi, op, &csi->process);
@@ -383,6 +446,10 @@ pdf_process_stream(pdf_csi *csi, pdf_lexbuf *buf)
}
while (tok != PDF_TOK_EOF);
}
+ fz_always(ctx)
+ {
+ pdf_clear_stack(csi);
+ }
fz_catch(ctx)
{
if (!csi->cookie)