diff options
Diffstat (limited to 'source/pdf')
-rw-r--r-- | source/pdf/pdf-image.c | 21 | ||||
-rw-r--r-- | source/pdf/pdf-interpret-imp.h | 1 | ||||
-rw-r--r-- | source/pdf/pdf-interpret.c | 67 | ||||
-rw-r--r-- | source/pdf/pdf-op-buffer.c | 121 | ||||
-rw-r--r-- | source/pdf/pdf-op-run.c | 56 | ||||
-rw-r--r-- | source/pdf/pdf-stream.c | 37 |
6 files changed, 224 insertions, 79 deletions
diff --git a/source/pdf/pdf-image.c b/source/pdf/pdf-image.c index c329c1a7..bb663ba2 100644 --- a/source/pdf/pdf-image.c +++ b/source/pdf/pdf-image.c @@ -22,6 +22,7 @@ pdf_load_image_imp(pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cs int i; fz_context *ctx = doc->ctx; + fz_compressed_buffer *buffer; fz_var(stm); fz_var(mask); @@ -133,26 +134,24 @@ pdf_load_image_imp(pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cs } } - /* Now, do we load a ref, or do we load the actual thing? */ + /* Do we load from a ref, or do we load an inline stream? */ if (cstm == NULL) { /* Just load the compressed image data now and we can * decode it on demand. */ int num = pdf_to_num(dict); int gen = pdf_to_gen(dict); - fz_compressed_buffer *buffer = pdf_load_compressed_stream(doc, num, gen); + buffer = pdf_load_compressed_stream(doc, num, gen); image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, buffer, mask); - break; /* Out of fz_try */ + } + else + { + /* Inline stream */ + stride = (w * n * bpc + 7) / 8; + image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, NULL, mask); + pdf_load_compressed_inline_image(doc, dict, stride * h, cstm, indexed, image); } - /* We need to decompress the image now */ - stride = (w * n * bpc + 7) / 8; - stm = pdf_open_inline_stream(doc, dict, stride * h, cstm, NULL); - - image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, NULL, mask); - colorspace = NULL; - mask = NULL; - image->tile = fz_decomp_image_from_stream(ctx, stm, image, indexed, 0, 0); } fz_catch(ctx) { diff --git a/source/pdf/pdf-interpret-imp.h b/source/pdf/pdf-interpret-imp.h index 6c3869cf..69a40147 100644 --- a/source/pdf/pdf-interpret-imp.h +++ b/source/pdf/pdf-interpret-imp.h @@ -123,6 +123,7 @@ struct pdf_csi_s int string_len; float stack[32]; int top; + fz_image *img; int xbalance; int in_text; diff --git a/source/pdf/pdf-interpret.c b/source/pdf/pdf-interpret.c index 3984d8e6..525d2ead 100644 --- a/source/pdf/pdf-interpret.c +++ b/source/pdf/pdf-interpret.c @@ -40,6 +40,9 @@ pdf_clear_stack(pdf_csi *csi) { int i; + fz_drop_image(csi->doc->ctx, csi->img); + csi->img = NULL; + pdf_drop_obj(csi->obj); csi->obj = NULL; @@ -64,6 +67,61 @@ pdf_free_csi(pdf_csi *csi) #define B(a,b) (a | b << 8) #define C(a,b,c) (a | b << 8 | c << 16) +static void +parse_inline_image(pdf_csi *csi) +{ + fz_context *ctx = csi->doc->ctx; + pdf_obj *rdb = csi->rdb; + fz_stream *file = csi->file; + int ch, found; + + fz_drop_image(ctx, csi->img); + csi->img = NULL; + pdf_drop_obj(csi->obj); + csi->obj = NULL; + + csi->obj = pdf_parse_dict(csi->doc, file, &csi->doc->lexbuf.base); + + /* read whitespace after ID keyword */ + ch = fz_read_byte(file); + if (ch == '\r') + if (fz_peek_byte(file) == '\n') + fz_read_byte(file); + + fz_try(ctx) + { + csi->img = pdf_load_inline_image(csi->doc, rdb, csi->obj, file); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + /* find EI */ + found = 0; + ch = fz_read_byte(file); + do + { + while (ch != 'E' && ch != EOF) + ch = fz_read_byte(file); + if (ch == 'E') + { + ch = fz_read_byte(file); + if (ch == 'I') + { + ch = fz_peek_byte(file); + if (ch == ' ' || ch <= 32 || ch == EOF || ch == '<' || ch == '/') + { + found = 1; + break; + } + } + } + } while (ch != EOF); + if (!found) + fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error after inline image"); +} + static int pdf_run_keyword(pdf_csi *csi, char *buf) { @@ -177,6 +235,11 @@ pdf_run_keyword(pdf_csi *csi, char *buf) return 0; } + if (op == PDF_OP_BI) + { + parse_inline_image(csi); + } + if (op < PDF_OP_Do) { pdf_process_op(csi, op, &csi->process); @@ -383,6 +446,10 @@ pdf_process_stream(pdf_csi *csi, pdf_lexbuf *buf) } while (tok != PDF_TOK_EOF); } + fz_always(ctx) + { + pdf_clear_stack(csi); + } fz_catch(ctx) { if (!csi->cookie) diff --git a/source/pdf/pdf-op-buffer.c b/source/pdf/pdf-op-buffer.c index 7660b03e..22436ce5 100644 --- a/source/pdf/pdf-op-buffer.c +++ b/source/pdf/pdf-op-buffer.c @@ -113,23 +113,118 @@ static void pdf_buffer_BI(pdf_csi *csi, void *state_) { pdf_buffer_state *state = (pdf_buffer_state *)state_; - fz_stream *file = csi->file; - pdf_obj *obj; - int ch; + int len, i; + unsigned char *data; + fz_compressed_buffer *cbuf; + fz_buffer *buffer; + const char *match; + const char *match2; + pdf_obj *filter; + fz_context *ctx = csi->doc->ctx; - obj = pdf_parse_dict(csi->doc, csi->file, &csi->doc->lexbuf.base); + if (csi->img == NULL) + return; + cbuf = csi->img->buffer; + if (cbuf == NULL) + return; + buffer = cbuf->buffer; + if (buffer == NULL) + return; - /* read whitespace after ID keyword */ - ch = fz_read_byte(file); - if (ch == '\r') - if (fz_peek_byte(file) == '\n') - fz_read_byte(file); + /* Tweak the /Filter entry in csi->obj to match the buffer params */ + switch (cbuf->params.type) + { + case FZ_IMAGE_JPEG: + match = "DCTDecode"; + match2 = "DCT"; + break; + case FZ_IMAGE_FAX: + match = "CCITTFaxDecode"; + match2 = "CCF"; + break; + case FZ_IMAGE_RAW: + match = NULL; + match2 = NULL; + break; + case FZ_IMAGE_RLD: + match = "RunLengthDecode"; + match2 = "RL"; + break; + case FZ_IMAGE_FLATE: + match = "FlateDecode"; + match2 = "Fl"; + break; + case FZ_IMAGE_LZW: + match = "LZWDecode"; + match2 = "LZW"; + break; + default: + fz_warn(ctx, "Unsupported type (%d) of inline image", cbuf->params.type); + return; + } - fz_printf(state->out, "BI "); - pdf_output_obj(state->out, obj, 1); - fz_printf(state->out, " ID\n"); + filter = pdf_dict_gets(csi->obj, "Filter"); + if (filter == NULL) + filter = pdf_dict_gets(csi->obj, "F"); + if (match == NULL) + { + /* Remove any filter entry (e.g. Ascii85Decode) */ + if (filter) + { + pdf_dict_dels(csi->obj, "Filter"); + pdf_dict_dels(csi->obj, "F"); + } + pdf_dict_dels(csi->obj, "DecodeParms"); + pdf_dict_dels(csi->obj, "DP"); + } + else if (pdf_is_array(filter)) + { + int l = pdf_array_len(filter); + pdf_obj *o = (l == 0 ? NULL : pdf_array_get(filter, l-1)); + const char *fil = pdf_to_name(o); - /* FIXME */ + if (l == 0 || (strcmp(fil, match) && strcmp(fil, match2))) + { + fz_warn(ctx, "Unexpected Filter configuration in inline image"); + return; + } + pdf_dict_puts(csi->obj, "F", o); + + o = pdf_dict_gets(csi->obj, "DecodeParms"); + if (o == NULL) + o = pdf_dict_gets(csi->obj, "DP"); + if (o) + { + o = pdf_array_get(o, l-1); + if (o) + pdf_dict_puts(csi->obj, "DP", o); + else + pdf_dict_dels(csi->obj, "DP"); + pdf_dict_dels(csi->obj, "DecodeParms"); + } + } + else + { + /* It's a singleton. It must be correct */ + } + + fz_printf(state->out, "BI\n"); + + len = pdf_dict_len(csi->obj); + for (i = 0; i < len; i++) + { + pdf_output_obj(state->out, pdf_dict_get_key(csi->obj, i), 1); + pdf_output_obj(state->out, pdf_dict_get_val(csi->obj, i), 1); + } + fz_printf(state->out, "ID\n"); + + buffer = csi->img->buffer->buffer; + len = buffer->len; + data = buffer->data; + for (i = 0; i < len; i++) + { + fz_printf(state->out, "%c", data[i]); + } fz_printf(state->out, "\nEI\n"); } diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c index 0f74667a..5575bb00 100644 --- a/source/pdf/pdf-op-run.c +++ b/source/pdf/pdf-op-run.c @@ -1735,62 +1735,8 @@ static void pdf_run_BDC(pdf_csi *csi, void *state) static void pdf_run_BI(pdf_csi *csi, void *state) { pdf_run_state *pr = (pdf_run_state *)state; - fz_context *ctx = csi->doc->ctx; - pdf_obj *rdb = csi->rdb; - fz_stream *file = csi->file; - int ch; - fz_image *img; - pdf_obj *obj; - int found; - - obj = pdf_parse_dict(csi->doc, file, &csi->doc->lexbuf.base); - - /* read whitespace after ID keyword */ - ch = fz_read_byte(file); - if (ch == '\r') - if (fz_peek_byte(file) == '\n') - fz_read_byte(file); - - fz_try(ctx) - { - img = pdf_load_inline_image(csi->doc, rdb, obj, file); - } - fz_always(ctx) - { - pdf_drop_obj(obj); - } - fz_catch(ctx) - { - fz_rethrow(ctx); - } - - pdf_show_image(csi, pr, img); - fz_drop_image(ctx, img); - - /* find EI */ - found = 0; - ch = fz_read_byte(file); - do - { - while (ch != 'E' && ch != EOF) - ch = fz_read_byte(file); - if (ch == 'E') - { - ch = fz_read_byte(file); - if (ch == 'I') - { - ch = fz_peek_byte(file); - if (ch == ' ' || ch <= 32 || ch == EOF || ch == '<' || ch == '/') - { - found = 1; - break; - } - } - } - } while (ch != EOF); - if (!found) - fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error after inline image"); + pdf_show_image(csi, pr, csi->img); } static void pdf_run_B(pdf_csi *csi, void *state) diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c index e0b809c7..2c552e39 100644 --- a/source/pdf/pdf-stream.c +++ b/source/pdf/pdf-stream.c @@ -93,6 +93,9 @@ build_filter(fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int nu int colors = pdf_to_int(pdf_dict_gets(p, "Colors")); int bpc = pdf_to_int(pdf_dict_gets(p, "BitsPerComponent")); + if (params) + params->type = FZ_IMAGE_RAW; + if (!strcmp(s, "ASCIIHexDecode") || !strcmp(s, "AHx")) return fz_open_ahxd(chain); @@ -358,9 +361,43 @@ pdf_open_inline_stream(pdf_document *doc, pdf_obj *stmobj, int length, fz_stream if (pdf_array_len(filters) > 0) return build_filter_chain(chain, doc, filters, params, 0, 0, imparams); + if (imparams) + imparams->type = FZ_IMAGE_RAW; return fz_open_null(chain, length, fz_tell(chain)); } +void +pdf_load_compressed_inline_image(pdf_document *doc, pdf_obj *dict, int length, fz_stream *stm, int indexed, fz_image *image) +{ + fz_context *ctx = doc->ctx; + fz_compressed_buffer *bc = fz_malloc_struct(ctx, fz_compressed_buffer); + fz_stream *istm = NULL; + + fz_var(istm); + + fz_try(ctx) + { + int dummy_l2factor = 0; + bc->buffer = fz_new_buffer(ctx, 1024); + + stm = pdf_open_inline_stream(doc, dict, length, stm, &bc->params); + stm = fz_open_leecher(stm, bc->buffer); + istm = fz_open_image_decomp_stream(ctx, stm, &bc->params, &dummy_l2factor); + + image->tile = fz_decomp_image_from_stream(ctx, istm, image, indexed, 0, 0); + } + fz_always(ctx) + { + fz_close(istm); + } + fz_catch(ctx) + { + fz_free(ctx, bc); + fz_rethrow(ctx); + } + image->buffer = bc; +} + /* * Open a stream for reading the raw (compressed but decrypted) data. */ |