From 7eb8e51b6b889fa14aae39d282675b838aa43dc9 Mon Sep 17 00:00:00 2001 From: Paul Gardiner Date: Wed, 18 Jul 2012 14:45:28 +0100 Subject: Update pdf_to_utf8 to handle either a stream or a string Also change first argument from fz_context to pdf_document in each of pdf_to_utf8, pdf_to_utf8_name, pdf_to_ucs2 and pdf_to_ucs2_name --- pdf/mupdf.h | 8 ++-- pdf/pdf_annot.c | 8 ++-- pdf/pdf_nametree.c | 2 +- pdf/pdf_outline.c | 2 +- pdf/pdf_parse.c | 109 ++++++++++++++++++++++++++++++++++------------------- pdf/pdf_xref.c | 2 +- 6 files changed, 81 insertions(+), 50 deletions(-) diff --git a/pdf/mupdf.h b/pdf/mupdf.h index c2566c02..f3821efd 100644 --- a/pdf/mupdf.h +++ b/pdf/mupdf.h @@ -85,10 +85,10 @@ int pdf_fprint_obj(FILE *fp, pdf_obj *obj, int tight); void pdf_print_obj(pdf_obj *obj); void pdf_print_ref(pdf_obj *obj); -char *pdf_to_utf8(fz_context *ctx, pdf_obj *src); -unsigned short *pdf_to_ucs2(fz_context *ctx, pdf_obj *src); /* sumatrapdf */ -pdf_obj *pdf_to_utf8_name(fz_context *ctx, pdf_obj *src); -char *pdf_from_ucs2(fz_context *ctx, unsigned short *str); +char *pdf_to_utf8(pdf_document *xref, pdf_obj *src); +unsigned short *pdf_to_ucs2(pdf_document *xref, pdf_obj *src); /* sumatrapdf */ +pdf_obj *pdf_to_utf8_name(pdf_document *xref, pdf_obj *src); +char *pdf_from_ucs2(pdf_document *xref, unsigned short *str); fz_rect pdf_to_rect(fz_context *ctx, pdf_obj *array); fz_matrix pdf_to_matrix(fz_context *ctx, pdf_obj *array); diff --git a/pdf/pdf_annot.c b/pdf/pdf_annot.c index dc31498c..fc436409 100644 --- a/pdf/pdf_annot.c +++ b/pdf/pdf_annot.c @@ -213,7 +213,7 @@ pdf_parse_action(pdf_document *xref, pdf_obj *action) { ld.kind = FZ_LINK_URI; ld.ld.uri.is_map = pdf_to_bool(pdf_dict_gets(action, "IsMap")); - ld.ld.uri.uri = pdf_to_utf8(ctx, pdf_dict_gets(action, "URI")); + ld.ld.uri.uri = pdf_to_utf8(xref, pdf_dict_gets(action, "URI")); } else if (!strcmp(pdf_to_name(obj), "Launch")) { @@ -221,20 +221,20 @@ pdf_parse_action(pdf_document *xref, pdf_obj *action) ld.kind = FZ_LINK_LAUNCH; if (pdf_is_dict(dest)) dest = pdf_dict_gets(dest, "F"); - ld.ld.launch.file_spec = pdf_to_utf8(ctx, dest); + ld.ld.launch.file_spec = pdf_to_utf8(xref, dest); ld.ld.launch.new_window = pdf_to_int(pdf_dict_gets(action, "NewWindow")); } else if (!strcmp(pdf_to_name(obj), "Named")) { ld.kind = FZ_LINK_NAMED; - ld.ld.named.named = pdf_to_utf8(ctx, pdf_dict_gets(action, "N")); + ld.ld.named.named = pdf_to_utf8(xref, pdf_dict_gets(action, "N")); } else if (!strcmp(pdf_to_name(obj), "GoToR")) { dest = pdf_dict_gets(action, "D"); ld = pdf_parse_link_dest(xref, dest); ld.kind = FZ_LINK_GOTOR; - ld.ld.gotor.file_spec = pdf_to_utf8(ctx, pdf_dict_gets(action, "F")); + ld.ld.gotor.file_spec = pdf_to_utf8(xref, pdf_dict_gets(action, "F")); ld.ld.gotor.new_window = pdf_to_int(pdf_dict_gets(action, "NewWindow")); } return ld; diff --git a/pdf/pdf_nametree.c b/pdf/pdf_nametree.c index 7d8ac319..25fced52 100644 --- a/pdf/pdf_nametree.c +++ b/pdf/pdf_nametree.c @@ -135,7 +135,7 @@ pdf_load_name_tree_imp(pdf_obj *dict, pdf_document *xref, pdf_obj *node) pdf_obj *val = pdf_array_get(names, i + 1); if (pdf_is_string(key)) { - key = pdf_to_utf8_name(ctx, key); + key = pdf_to_utf8_name(xref, key); pdf_dict_put(dict, key, val); pdf_drop_obj(key); } diff --git a/pdf/pdf_outline.c b/pdf/pdf_outline.c index d4bea75a..48f3853c 100644 --- a/pdf/pdf_outline.c +++ b/pdf/pdf_outline.c @@ -29,7 +29,7 @@ pdf_load_outline_imp(pdf_document *xref, pdf_obj *dict) obj = pdf_dict_gets(dict, "Title"); if (obj) - node->title = pdf_to_utf8(ctx, obj); + node->title = pdf_to_utf8(xref, obj); if ((obj = pdf_dict_gets(dict, "Dest"))) node->dest = pdf_parse_link_dest(xref, obj); diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c index 0ba6b0a4..213c399e 100644 --- a/pdf/pdf_parse.c +++ b/pdf/pdf_parse.c @@ -31,60 +31,89 @@ pdf_to_matrix(fz_context *ctx, pdf_obj *array) /* Convert Unicode/PdfDocEncoding string into utf-8 */ char * -pdf_to_utf8(fz_context *ctx, pdf_obj *src) +pdf_to_utf8(pdf_document *xref, pdf_obj *src) { - unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); + fz_context *ctx = xref->ctx; + fz_buffer *strmbuf = NULL; + unsigned char *srcptr; char *dstptr, *dst; - int srclen = pdf_to_str_len(src); + int srclen; int dstlen = 0; int ucs; int i; - if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) + fz_var(strmbuf); + fz_try(ctx) { - for (i = 2; i + 1 < srclen; i += 2) + if (pdf_is_string(src)) { - ucs = srcptr[i] << 8 | srcptr[i+1]; - dstlen += fz_runelen(ucs); + srcptr = (unsigned char *) pdf_to_str_buf(src); + srclen = pdf_to_str_len(src); } - - dstptr = dst = fz_malloc(ctx, dstlen + 1); - - for (i = 2; i + 1 < srclen; i += 2) + else if (pdf_is_stream(xref, pdf_to_num(src), pdf_to_gen(src))) { - ucs = srcptr[i] << 8 | srcptr[i+1]; - dstptr += fz_runetochar(dstptr, ucs); + strmbuf = pdf_load_stream(xref, pdf_to_num(src), pdf_to_gen(src)); + srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr); } - } - else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) - { - for (i = 2; i + 1 < srclen; i += 2) + else { - ucs = srcptr[i] | srcptr[i+1] << 8; - dstlen += fz_runelen(ucs); + srclen = 0; } - dstptr = dst = fz_malloc(ctx, dstlen + 1); - - for (i = 2; i + 1 < srclen; i += 2) + if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { - ucs = srcptr[i] | srcptr[i+1] << 8; - dstptr += fz_runetochar(dstptr, ucs); + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] << 8 | srcptr[i+1]; + dstlen += fz_runelen(ucs); + } + + dstptr = dst = fz_malloc(ctx, dstlen + 1); + + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] << 8 | srcptr[i+1]; + dstptr += fz_runetochar(dstptr, ucs); + } } - } - else - { - for (i = 0; i < srclen; i++) - dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); + else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) + { + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] | srcptr[i+1] << 8; + dstlen += fz_runelen(ucs); + } - dstptr = dst = fz_malloc(ctx, dstlen + 1); + dstptr = dst = fz_malloc(ctx, dstlen + 1); - for (i = 0; i < srclen; i++) + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] | srcptr[i+1] << 8; + dstptr += fz_runetochar(dstptr, ucs); + } + } + else { - ucs = pdf_doc_encoding[srcptr[i]]; - dstptr += fz_runetochar(dstptr, ucs); + for (i = 0; i < srclen; i++) + dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); + + dstptr = dst = fz_malloc(ctx, dstlen + 1); + + for (i = 0; i < srclen; i++) + { + ucs = pdf_doc_encoding[srcptr[i]]; + dstptr += fz_runetochar(dstptr, ucs); + } } } + fz_always(ctx) + { + fz_drop_buffer(ctx, strmbuf); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } *dstptr = '\0'; return dst; @@ -92,8 +121,9 @@ pdf_to_utf8(fz_context *ctx, pdf_obj *src) /* Convert Unicode/PdfDocEncoding string into ucs-2 */ unsigned short * -pdf_to_ucs2(fz_context *ctx, pdf_obj *src) +pdf_to_ucs2(pdf_document *xref, pdf_obj *src) { + fz_context *ctx = xref->ctx; unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); unsigned short *dstptr, *dst; int srclen = pdf_to_str_len(src); @@ -124,8 +154,9 @@ pdf_to_ucs2(fz_context *ctx, pdf_obj *src) /* Convert UCS-2 string into PdfDocEncoding for authentication */ char * -pdf_from_ucs2(fz_context *ctx, unsigned short *src) +pdf_from_ucs2(pdf_document *xref, unsigned short *src) { + fz_context *ctx = xref->ctx; int i, j, len; char *docstr; @@ -162,11 +193,11 @@ pdf_from_ucs2(fz_context *ctx, unsigned short *src) } pdf_obj * -pdf_to_utf8_name(fz_context *ctx, pdf_obj *src) +pdf_to_utf8_name(pdf_document *xref, pdf_obj *src) { - char *buf = pdf_to_utf8(ctx, src); - pdf_obj *dst = fz_new_name(ctx, buf); - fz_free(ctx, buf); + char *buf = pdf_to_utf8(xref, src); + pdf_obj *dst = fz_new_name(xref->ctx, buf); + fz_free(xref->ctx, buf); return dst; } diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c index a1a19f22..4a03751c 100644 --- a/pdf/pdf_xref.c +++ b/pdf/pdf_xref.c @@ -1222,7 +1222,7 @@ pdf_meta(pdf_document *doc, int key, void *ptr, int size) } if (info && ptr && size) { - char *utf8 = pdf_to_utf8(doc->ctx, info); + char *utf8 = pdf_to_utf8(doc, info); strncpy(ptr, utf8, size); ((char *)ptr)[size-1] = 0; fz_free(doc->ctx, utf8); -- cgit v1.2.3