diff options
Diffstat (limited to 'source/pdf/pdf-parse.c')
-rw-r--r-- | source/pdf/pdf-parse.c | 267 |
1 files changed, 130 insertions, 137 deletions
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c index a2fd09db..48761374 100644 --- a/source/pdf/pdf-parse.c +++ b/source/pdf/pdf-parse.c @@ -3,10 +3,10 @@ fz_rect * pdf_to_rect(fz_context *ctx, pdf_obj *array, fz_rect *r) { - float a = pdf_to_real(pdf_array_get(array, 0)); - float b = pdf_to_real(pdf_array_get(array, 1)); - float c = pdf_to_real(pdf_array_get(array, 2)); - float d = pdf_to_real(pdf_array_get(array, 3)); + float a = pdf_to_real(ctx, pdf_array_get(ctx, array, 0)); + float b = pdf_to_real(ctx, pdf_array_get(ctx, array, 1)); + float c = pdf_to_real(ctx, pdf_array_get(ctx, array, 2)); + float d = pdf_to_real(ctx, pdf_array_get(ctx, array, 3)); r->x0 = fz_min(a, c); r->y0 = fz_min(b, d); r->x1 = fz_max(a, c); @@ -17,20 +17,19 @@ pdf_to_rect(fz_context *ctx, pdf_obj *array, fz_rect *r) fz_matrix * pdf_to_matrix(fz_context *ctx, pdf_obj *array, fz_matrix *m) { - m->a = pdf_to_real(pdf_array_get(array, 0)); - m->b = pdf_to_real(pdf_array_get(array, 1)); - m->c = pdf_to_real(pdf_array_get(array, 2)); - m->d = pdf_to_real(pdf_array_get(array, 3)); - m->e = pdf_to_real(pdf_array_get(array, 4)); - m->f = pdf_to_real(pdf_array_get(array, 5)); + m->a = pdf_to_real(ctx, pdf_array_get(ctx, array, 0)); + m->b = pdf_to_real(ctx, pdf_array_get(ctx, array, 1)); + m->c = pdf_to_real(ctx, pdf_array_get(ctx, array, 2)); + m->d = pdf_to_real(ctx, pdf_array_get(ctx, array, 3)); + m->e = pdf_to_real(ctx, pdf_array_get(ctx, array, 4)); + m->f = pdf_to_real(ctx, pdf_array_get(ctx, array, 5)); return m; } /* Convert Unicode/PdfDocEncoding string into utf-8 */ char * -pdf_to_utf8(pdf_document *doc, pdf_obj *src) +pdf_to_utf8(fz_context *ctx, pdf_document *doc, pdf_obj *src) { - fz_context *ctx = doc->ctx; fz_buffer *strmbuf = NULL; unsigned char *srcptr; char *dstptr, *dst; @@ -42,14 +41,14 @@ pdf_to_utf8(pdf_document *doc, pdf_obj *src) fz_var(strmbuf); fz_try(ctx) { - if (pdf_is_string(src)) + if (pdf_is_string(ctx, src)) { - srcptr = (unsigned char *) pdf_to_str_buf(src); - srclen = pdf_to_str_len(src); + srcptr = (unsigned char *) pdf_to_str_buf(ctx, src); + srclen = pdf_to_str_len(ctx, src); } - else if (pdf_is_stream(doc, pdf_to_num(src), pdf_to_gen(src))) + else if (pdf_is_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src))) { - strmbuf = pdf_load_stream(doc, pdf_to_num(src), pdf_to_gen(src)); + strmbuf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src)); srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr); } else @@ -118,12 +117,11 @@ pdf_to_utf8(pdf_document *doc, pdf_obj *src) /* Convert Unicode/PdfDocEncoding string into ucs-2 */ unsigned short * -pdf_to_ucs2(pdf_document *doc, pdf_obj *src) +pdf_to_ucs2(fz_context *ctx, pdf_document *doc, pdf_obj *src) { - fz_context *ctx = doc->ctx; - unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); + unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(ctx, src); unsigned short *dstptr, *dst; - int srclen = pdf_to_str_len(src); + int srclen = pdf_to_str_len(ctx, src); int i; if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) @@ -152,11 +150,11 @@ pdf_to_ucs2(pdf_document *doc, pdf_obj *src) /* allow to convert to UCS-2 without the need for an fz_context */ /* (buffer must be at least (fz_to_str_len(src) + 1) * 2 bytes in size) */ void -pdf_to_ucs2_buf(unsigned short *buffer, pdf_obj *src) +pdf_to_ucs2_buf(fz_context *ctx, unsigned short *buffer, pdf_obj *src) { - unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); + unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(ctx, src); unsigned short *dstptr = buffer; - int srclen = pdf_to_str_len(src); + int srclen = pdf_to_str_len(ctx, src); int i; if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) @@ -180,9 +178,8 @@ pdf_to_ucs2_buf(unsigned short *buffer, pdf_obj *src) /* Convert UCS-2 string into PdfDocEncoding for authentication */ char * -pdf_from_ucs2(pdf_document *doc, unsigned short *src) +pdf_from_ucs2(fz_context *ctx, pdf_document *doc, unsigned short *src) { - fz_context *ctx = doc->ctx; int i, j, len; char *docstr; @@ -219,48 +216,47 @@ pdf_from_ucs2(pdf_document *doc, unsigned short *src) } pdf_obj * -pdf_to_utf8_name(pdf_document *doc, pdf_obj *src) +pdf_to_utf8_name(fz_context *ctx, pdf_document *doc, pdf_obj *src) { - char *buf = pdf_to_utf8(doc, src); - pdf_obj *dst = pdf_new_name(doc, buf); - fz_free(doc->ctx, buf); + char *buf = pdf_to_utf8(ctx, doc, src); + pdf_obj *dst = pdf_new_name(ctx, doc, buf); + fz_free(ctx, buf); return dst; } pdf_obj * -pdf_parse_array(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) +pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *ary = NULL; pdf_obj *obj = NULL; int a = 0, b = 0, n = 0; pdf_token tok; - fz_context *ctx = file->ctx; pdf_obj *op = NULL; fz_var(obj); - ary = pdf_new_array(doc, 4); + ary = pdf_new_array(ctx, doc, 4); fz_try(ctx) { while (1) { - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT && tok != PDF_TOK_R) { if (n > 0) { - obj = pdf_new_int(doc, a); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_int(ctx, doc, a); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; } if (n > 1) { - obj = pdf_new_int(doc, b); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_int(ctx, doc, b); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; } n = 0; @@ -268,9 +264,9 @@ pdf_parse_array(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) if (tok == PDF_TOK_INT && n == 2) { - obj = pdf_new_int(doc, a); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_int(ctx, doc, a); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; a = b; n --; @@ -293,61 +289,61 @@ pdf_parse_array(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) case PDF_TOK_R: if (n != 2) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse indirect reference in array"); - obj = pdf_new_indirect(doc, a, b); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_indirect(ctx, doc, a, b); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; n = 0; break; case PDF_TOK_OPEN_ARRAY: - obj = pdf_parse_array(doc, file, buf); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_parse_array(ctx, doc, file, buf); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; break; case PDF_TOK_OPEN_DICT: - obj = pdf_parse_dict(doc, file, buf); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_parse_dict(ctx, doc, file, buf); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; break; case PDF_TOK_NAME: - obj = pdf_new_name(doc, buf->scratch); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_name(ctx, doc, buf->scratch); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; break; case PDF_TOK_REAL: - obj = pdf_new_real(doc, buf->f); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_real(ctx, doc, buf->f); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; break; case PDF_TOK_STRING: - obj = pdf_new_string(doc, buf->scratch, buf->len); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; break; case PDF_TOK_TRUE: - obj = pdf_new_bool(doc, 1); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_bool(ctx, doc, 1); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; break; case PDF_TOK_FALSE: - obj = pdf_new_bool(doc, 0); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_bool(ctx, doc, 0); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; break; case PDF_TOK_NULL: - obj = pdf_new_null(doc); - pdf_array_push(ary, obj); - pdf_drop_obj(obj); + obj = pdf_new_null(ctx, doc); + pdf_array_push(ctx, ary, obj); + pdf_drop_obj(ctx, obj); obj = NULL; break; @@ -360,24 +356,23 @@ end: } fz_catch(ctx) { - pdf_drop_obj(obj); - pdf_drop_obj(ary); + pdf_drop_obj(ctx, obj); + pdf_drop_obj(ctx, ary); fz_rethrow_message(ctx, "cannot parse array"); } return op; } pdf_obj * -pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) +pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *dict; pdf_obj *key = NULL; pdf_obj *val = NULL; pdf_token tok; int a, b; - fz_context *ctx = file->ctx; - dict = pdf_new_dict(doc, 8); + dict = pdf_new_dict(ctx, doc, 8); fz_var(key); fz_var(val); @@ -386,7 +381,7 @@ pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) { while (1) { - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); skip: if (tok == PDF_TOK_CLOSE_DICT) break; @@ -398,49 +393,49 @@ pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) if (tok != PDF_TOK_NAME) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key in dict"); - key = pdf_new_name(doc, buf->scratch); + key = pdf_new_name(ctx, doc, buf->scratch); - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: - val = pdf_parse_array(doc, file, buf); + val = pdf_parse_array(ctx, doc, file, buf); break; case PDF_TOK_OPEN_DICT: - val = pdf_parse_dict(doc, file, buf); + val = pdf_parse_dict(ctx, doc, file, buf); break; - case PDF_TOK_NAME: val = pdf_new_name(doc, buf->scratch); break; - case PDF_TOK_REAL: val = pdf_new_real(doc, buf->f); break; - case PDF_TOK_STRING: val = pdf_new_string(doc, buf->scratch, buf->len); break; - case PDF_TOK_TRUE: val = pdf_new_bool(doc, 1); break; - case PDF_TOK_FALSE: val = pdf_new_bool(doc, 0); break; - case PDF_TOK_NULL: val = pdf_new_null(doc); break; + case PDF_TOK_NAME: val = pdf_new_name(ctx, doc, buf->scratch); break; + case PDF_TOK_REAL: val = pdf_new_real(ctx, doc, buf->f); break; + case PDF_TOK_STRING: val = pdf_new_string(ctx, doc, buf->scratch, buf->len); break; + case PDF_TOK_TRUE: val = pdf_new_bool(ctx, doc, 1); break; + case PDF_TOK_FALSE: val = pdf_new_bool(ctx, doc, 0); break; + case PDF_TOK_NULL: val = pdf_new_null(ctx, doc); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = buf->i; - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))) { - val = pdf_new_int(doc, a); - pdf_dict_put(dict, key, val); - pdf_drop_obj(val); + val = pdf_new_int(ctx, doc, a); + pdf_dict_put(ctx, dict, key, val); + pdf_drop_obj(ctx, val); val = NULL; - pdf_drop_obj(key); + pdf_drop_obj(ctx, key); key = NULL; goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_R) { - val = pdf_new_indirect(doc, a, b); + val = pdf_new_indirect(ctx, doc, a, b); break; } } @@ -450,50 +445,49 @@ pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in dict"); } - pdf_dict_put(dict, key, val); - pdf_drop_obj(val); + pdf_dict_put(ctx, dict, key, val); + pdf_drop_obj(ctx, val); val = NULL; - pdf_drop_obj(key); + pdf_drop_obj(ctx, key); key = NULL; } } fz_catch(ctx) { - pdf_drop_obj(dict); - pdf_drop_obj(key); - pdf_drop_obj(val); + pdf_drop_obj(ctx, dict); + pdf_drop_obj(ctx, key); + pdf_drop_obj(ctx, val); fz_rethrow_message(ctx, "cannot parse dict"); } return dict; } pdf_obj * -pdf_parse_stm_obj(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) +pdf_parse_stm_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) { pdf_token tok; - fz_context *ctx = file->ctx; - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: - return pdf_parse_array(doc, file, buf); + return pdf_parse_array(ctx, doc, file, buf); case PDF_TOK_OPEN_DICT: - return pdf_parse_dict(doc, file, buf); - case PDF_TOK_NAME: return pdf_new_name(doc, buf->scratch); break; - case PDF_TOK_REAL: return pdf_new_real(doc, buf->f); break; - case PDF_TOK_STRING: return pdf_new_string(doc, buf->scratch, buf->len); break; - case PDF_TOK_TRUE: return pdf_new_bool(doc, 1); break; - case PDF_TOK_FALSE: return pdf_new_bool(doc, 0); break; - case PDF_TOK_NULL: return pdf_new_null(doc); break; - case PDF_TOK_INT: return pdf_new_int(doc, buf->i); break; + return pdf_parse_dict(ctx, doc, file, buf); + case PDF_TOK_NAME: return pdf_new_name(ctx, doc, buf->scratch); break; + case PDF_TOK_REAL: return pdf_new_real(ctx, doc, buf->f); break; + case PDF_TOK_STRING: return pdf_new_string(ctx, doc, buf->scratch, buf->len); break; + case PDF_TOK_TRUE: return pdf_new_bool(ctx, doc, 1); break; + case PDF_TOK_FALSE: return pdf_new_bool(ctx, doc, 0); break; + case PDF_TOK_NULL: return pdf_new_null(ctx, doc); break; + case PDF_TOK_INT: return pdf_new_int(ctx, doc, buf->i); break; default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in object stream"); } } pdf_obj * -pdf_parse_ind_obj(pdf_document *doc, +pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, int *ostmofs, int *try_repair) { @@ -501,11 +495,10 @@ pdf_parse_ind_obj(pdf_document *doc, int num = 0, gen = 0, stm_ofs; pdf_token tok; int a, b; - fz_context *ctx = file->ctx; fz_var(obj); - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) @@ -514,7 +507,7 @@ pdf_parse_ind_obj(pdf_document *doc, } num = buf->i; - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) @@ -523,7 +516,7 @@ pdf_parse_ind_obj(pdf_document *doc, } gen = buf->i; - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_OBJ) { if (try_repair) @@ -531,48 +524,48 @@ pdf_parse_ind_obj(pdf_document *doc, fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen); } - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: - obj = pdf_parse_array(doc, file, buf); + obj = pdf_parse_array(ctx, doc, file, buf); break; case PDF_TOK_OPEN_DICT: - obj = pdf_parse_dict(doc, file, buf); + obj = pdf_parse_dict(ctx, doc, file, buf); break; - case PDF_TOK_NAME: obj = pdf_new_name(doc, buf->scratch); break; - case PDF_TOK_REAL: obj = pdf_new_real(doc, buf->f); break; - case PDF_TOK_STRING: obj = pdf_new_string(doc, buf->scratch, buf->len); break; - case PDF_TOK_TRUE: obj = pdf_new_bool(doc, 1); break; - case PDF_TOK_FALSE: obj = pdf_new_bool(doc, 0); break; - case PDF_TOK_NULL: obj = pdf_new_null(doc); break; + case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break; + case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break; + case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break; + case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break; + case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break; + case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break; case PDF_TOK_INT: a = buf->i; - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { - obj = pdf_new_int(doc, a); + obj = pdf_new_int(ctx, doc, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_R) { - obj = pdf_new_indirect(doc, a, b); + obj = pdf_new_indirect(ctx, doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: - obj = pdf_new_null(doc); + obj = pdf_new_null(ctx, doc); goto skip; default: @@ -581,29 +574,29 @@ pdf_parse_ind_obj(pdf_document *doc, fz_try(ctx) { - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); } fz_catch(ctx) { - pdf_drop_obj(obj); + pdf_drop_obj(ctx, obj); fz_rethrow_message(ctx, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { - int c = fz_read_byte(file); + int c = fz_read_byte(ctx, file); while (c == ' ') - c = fz_read_byte(file); + c = fz_read_byte(ctx, file); if (c == '\r') { - c = fz_peek_byte(file); + c = fz_peek_byte(ctx, file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else - fz_read_byte(file); + fz_read_byte(ctx, file); } - stm_ofs = fz_tell(file); + stm_ofs = fz_tell(ctx, file); } else if (tok == PDF_TOK_ENDOBJ) { |