diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2018-06-19 13:15:31 +0200 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2018-06-22 16:48:50 +0100 |
commit | f7ace61076d0ab3c71e1d2bd70967ddb0b86f220 (patch) | |
tree | 8bd8b4fe4d7414685d162f66911e9951ea3dce16 /source | |
parent | 148b0934370336fc8b260e8c3aef83daf4d80ba4 (diff) | |
download | mupdf-f7ace61076d0ab3c71e1d2bd70967ddb0b86f220.tar.xz |
Keep copy of decoded utf8 text string in pdf_obj.
Removes the need to alloc/free text strings in the API, allowing
for simple functions like pdf_dict_get_text_string.
Diffstat (limited to 'source')
-rw-r--r-- | source/pdf/pdf-annot.c | 12 | ||||
-rw-r--r-- | source/pdf/pdf-appearance.c | 11 | ||||
-rw-r--r-- | source/pdf/pdf-form.c | 21 | ||||
-rw-r--r-- | source/pdf/pdf-link.c | 2 | ||||
-rw-r--r-- | source/pdf/pdf-nametree.c | 10 | ||||
-rw-r--r-- | source/pdf/pdf-object.c | 53 | ||||
-rw-r--r-- | source/pdf/pdf-outline.c | 2 | ||||
-rw-r--r-- | source/pdf/pdf-parse.c | 36 | ||||
-rw-r--r-- | source/pdf/pdf-xref.c | 5 | ||||
-rw-r--r-- | source/tools/murun.c | 40 |
10 files changed, 101 insertions, 91 deletions
diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c index ed6856de..ba92b7ef 100644 --- a/source/pdf/pdf-annot.c +++ b/source/pdf/pdf-annot.c @@ -392,10 +392,10 @@ pdf_set_annot_rect(fz_context *ctx, pdf_annot *annot, const fz_rect *rect) pdf_dirty_annot(ctx, annot); } -char * -pdf_copy_annot_contents(fz_context *ctx, pdf_annot *annot) +const char * +pdf_get_annot_contents(fz_context *ctx, pdf_annot *annot) { - return pdf_to_utf8(ctx, pdf_dict_get(ctx, annot->obj, PDF_NAME(Contents))); + return pdf_dict_get_text_string(ctx, annot->obj, PDF_NAME(Contents)); } void @@ -1397,11 +1397,11 @@ pdf_annot_has_author(fz_context *ctx, pdf_annot *annot) return is_allowed_subtype(ctx, annot, PDF_NAME(T), markup_subtypes); } -char * -pdf_copy_annot_author(fz_context *ctx, pdf_annot *annot) +const char * +pdf_get_annot_author(fz_context *ctx, pdf_annot *annot) { check_allowed_subtypes(ctx, annot, PDF_NAME(T), markup_subtypes); - return pdf_to_utf8(ctx, pdf_dict_get(ctx, annot->obj, PDF_NAME(T))); + return pdf_dict_get_text_string(ctx, annot->obj, PDF_NAME(T)); } void diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c index c3e94baa..9753be92 100644 --- a/source/pdf/pdf-appearance.c +++ b/source/pdf/pdf-appearance.c @@ -906,11 +906,12 @@ pdf_write_free_text_appearance(fz_context *ctx, pdf_annot *annot, fz_buffer *buf { const char *font; float size, color[3]; - char *text; + const char *text; float w, h, t, b; int q, r; /* /Rotate is an undocumented annotation property supported by Adobe */ + text = pdf_get_annot_contents(ctx, annot); r = pdf_dict_get_int(ctx, annot->obj, PDF_NAME(Rotate)); q = pdf_annot_quadding(ctx, annot); pdf_annot_default_appearance(ctx, annot, &font, &size, color); @@ -933,13 +934,7 @@ pdf_write_free_text_appearance(fz_context *ctx, pdf_annot *annot, fz_buffer *buf fz_append_printf(ctx, buf, "%g %g %g %g re\nS\n", b/2, b/2, w-b, h-b); } - text = pdf_copy_annot_contents(ctx, annot); - fz_try(ctx) - write_variable_text(ctx, annot, buf, res, text, font, size, color, q, w, h, b*2, 1); - fz_always(ctx) - fz_free(ctx, text); - fz_catch(ctx) - fz_rethrow(ctx); + write_variable_text(ctx, annot, buf, res, text, font, size, color, q, w, h, b*2, 1); } static void diff --git a/source/pdf/pdf-form.c b/source/pdf/pdf-form.c index 54cb1472..93b88e2c 100644 --- a/source/pdf/pdf-form.c +++ b/source/pdf/pdf-form.c @@ -1102,7 +1102,7 @@ int pdf_text_widget_set_text(fz_context *ctx, pdf_document *doc, pdf_widget *tw, } /* Get either the listed value or the export value. */ -int pdf_choice_widget_options(fz_context *ctx, pdf_document *doc, pdf_widget *tw, int exportval, char *opts[]) +int pdf_choice_widget_options(fz_context *ctx, pdf_document *doc, pdf_widget *tw, int exportval, const char *opts[]) { pdf_annot *annot = (pdf_annot *)tw; pdf_obj *optarr; @@ -1122,11 +1122,11 @@ int pdf_choice_widget_options(fz_context *ctx, pdf_document *doc, pdf_widget *tw /* If it is a two element array, the second item is the one that we want if we want the listing value. */ if (m == 2) if (exportval) - opts[i] = pdf_to_utf8(ctx, pdf_array_get(ctx, pdf_array_get(ctx, optarr, i), 0)); + opts[i] = pdf_array_get_text_string(ctx, pdf_array_get(ctx, optarr, i), 0); else - opts[i] = pdf_to_utf8(ctx, pdf_array_get(ctx, pdf_array_get(ctx, optarr, i), 1)); + opts[i] = pdf_array_get_text_string(ctx, pdf_array_get(ctx, optarr, i), 1); else - opts[i] = pdf_to_utf8(ctx, pdf_array_get(ctx, optarr, i)); + opts[i] = pdf_array_get_text_string(ctx, optarr, i); } } return n; @@ -1148,7 +1148,7 @@ int pdf_choice_widget_is_multiselect(fz_context *ctx, pdf_document *doc, pdf_wid } } -int pdf_choice_widget_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, char *opts[]) +int pdf_choice_widget_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, const char *opts[]) { pdf_annot *annot = (pdf_annot *)tw; pdf_obj *optarr; @@ -1162,32 +1162,27 @@ int pdf_choice_widget_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, if (pdf_is_string(ctx, optarr)) { if (opts) - opts[0] = pdf_to_utf8(ctx, optarr); - + opts[0] = pdf_to_text_string(ctx, optarr); return 1; } else { n = pdf_array_len(ctx, optarr); - if (opts) { for (i = 0; i < n; i++) { pdf_obj *elem = pdf_array_get(ctx, optarr, i); - if (pdf_is_array(ctx, elem)) elem = pdf_array_get(ctx, elem, 1); - - opts[i] = pdf_to_utf8(ctx, elem); + opts[i] = pdf_to_text_string(ctx, elem); } } - return n; } } -void pdf_choice_widget_set_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, int n, char *opts[]) +void pdf_choice_widget_set_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, int n, const char *opts[]) { pdf_annot *annot = (pdf_annot *)tw; pdf_obj *optarr = NULL, *opt; diff --git a/source/pdf/pdf-link.c b/source/pdf/pdf-link.c index 56c11973..e9f1c36f 100644 --- a/source/pdf/pdf-link.c +++ b/source/pdf/pdf-link.c @@ -134,7 +134,7 @@ pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_ return NULL; } - path = pdf_to_utf8(ctx, filename); + path = fz_strdup(ctx, pdf_to_text_string(ctx, filename)); #ifdef _WIN32 if (!pdf_name_eq(ctx, pdf_dict_get(ctx, file_spec, PDF_NAME(FS)), PDF_NAME(URL))) { diff --git a/source/pdf/pdf-nametree.c b/source/pdf/pdf-nametree.c index 30dd5974..4682024f 100644 --- a/source/pdf/pdf-nametree.c +++ b/source/pdf/pdf-nametree.c @@ -140,9 +140,13 @@ pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_ob pdf_obj *val = pdf_array_get(ctx, names, i + 1); if (pdf_is_string(ctx, key)) { - key = pdf_to_utf8_name(ctx, key); - pdf_dict_put(ctx, dict, key, val); - pdf_drop_obj(ctx, key); + key = pdf_new_name(ctx, pdf_to_text_string(ctx, key)); + fz_try(ctx) + pdf_dict_put(ctx, dict, key, val); + fz_always(ctx) + pdf_drop_obj(ctx, key); + fz_catch(ctx) + fz_rethrow(ctx); } else if (pdf_is_name(ctx, key)) { diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c index ec66376a..bc856644 100644 --- a/source/pdf/pdf-object.c +++ b/source/pdf/pdf-object.c @@ -59,6 +59,7 @@ typedef struct pdf_obj_num_s typedef struct pdf_obj_string_s { pdf_obj super; + char *text; /* utf8 encoded text string */ unsigned int len; char buf[1]; } pdf_obj_string; @@ -141,6 +142,7 @@ pdf_new_string(fz_context *ctx, const char *str, size_t len) obj->super.refs = 1; obj->super.kind = PDF_STRING; obj->super.flags = 0; + obj->text = NULL; obj->len = l; memcpy(obj->buf, str, len); obj->buf[len] = '\0'; @@ -336,6 +338,32 @@ int pdf_to_str_len(fz_context *ctx, pdf_obj *obj) return 0; } +const char *pdf_to_string(fz_context *ctx, pdf_obj *obj, size_t *sizep) +{ + RESOLVE(obj); + if (OBJ_IS_STRING(obj)) + { + if (sizep) + *sizep = STRING(obj)->len; + return STRING(obj)->buf; + } + if (sizep) + *sizep = 0; + return ""; +} + +const char *pdf_to_text_string(fz_context *ctx, pdf_obj *obj) +{ + RESOLVE(obj); + if (OBJ_IS_STRING(obj)) + { + if (!STRING(obj)->text) + STRING(obj)->text = pdf_new_utf8_from_pdf_string(ctx, STRING(obj)->buf, STRING(obj)->len); + return STRING(obj)->text; + } + return ""; +} + void pdf_set_int(fz_context *ctx, pdf_obj *obj, int64_t i) { if (OBJ_IS_INT(obj)) @@ -1669,6 +1697,11 @@ pdf_drop_obj(fz_context *ctx, pdf_obj *obj) pdf_drop_array(ctx, obj); else if (obj->kind == PDF_DICT) pdf_drop_dict(ctx, obj); + else if (obj->kind == PDF_STRING) + { + fz_free(ctx, STRING(obj)->text); + fz_free(ctx, obj); + } else fz_free(ctx, obj); } @@ -2254,10 +2287,12 @@ const char *pdf_dict_get_name(fz_context *ctx, pdf_obj *dict, pdf_obj *key) const char *pdf_dict_get_string(fz_context *ctx, pdf_obj *dict, pdf_obj *key, size_t *sizep) { - pdf_obj *val = pdf_dict_get(ctx, dict, key); - if (sizep) - *sizep = pdf_to_str_len(ctx, val); - return pdf_to_str_buf(ctx, val); + return pdf_to_string(ctx, pdf_dict_get(ctx, dict, key), sizep); +} + +const char *pdf_dict_get_text_string(fz_context *ctx, pdf_obj *dict, pdf_obj *key) +{ + return pdf_to_text_string(ctx, pdf_dict_get(ctx, dict, key)); } int pdf_array_get_bool(fz_context *ctx, pdf_obj *array, int index) @@ -2274,3 +2309,13 @@ float pdf_array_get_real(fz_context *ctx, pdf_obj *array, int index) { return pdf_to_real(ctx, pdf_array_get(ctx, array, index)); } + +const char *pdf_array_get_string(fz_context *ctx, pdf_obj *array, int index, size_t *sizep) +{ + return pdf_to_string(ctx, pdf_array_get(ctx, array, index), sizep); +} + +const char *pdf_array_get_text_string(fz_context *ctx, pdf_obj *array, int index) +{ + return pdf_to_text_string(ctx, pdf_array_get(ctx, array, index)); +} diff --git a/source/pdf/pdf-outline.c b/source/pdf/pdf-outline.c index ea8ecbeb..b091cf4a 100644 --- a/source/pdf/pdf-outline.c +++ b/source/pdf/pdf-outline.c @@ -24,7 +24,7 @@ pdf_load_outline_imp(fz_context *ctx, pdf_document *doc, pdf_obj *dict) obj = pdf_dict_get(ctx, dict, PDF_NAME(Title)); if (obj) - node->title = pdf_to_utf8(ctx, obj); + node->title = fz_strdup(ctx, pdf_to_text_string(ctx, obj)); if ((obj = pdf_dict_get(ctx, dict, PDF_NAME(Dest))) != NULL) node->uri = pdf_parse_link_dest(ctx, doc, obj); diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c index f4075c5a..fb1cc595 100644 --- a/source/pdf/pdf-parse.c +++ b/source/pdf/pdf-parse.c @@ -80,9 +80,11 @@ skip_language_code_utf8(const unsigned char *s, size_t n, size_t i) return 0; } +/* Convert Unicode/PdfDocEncoding string into utf-8 */ char * -pdf_to_utf8_imp(fz_context *ctx, const unsigned char *srcptr, size_t srclen) +pdf_new_utf8_from_pdf_string(fz_context *ctx, const char *ssrcptr, size_t srclen) { + const unsigned char *srcptr = (const unsigned char*)ssrcptr; char *dstptr, *dst; size_t dstlen = 0; int ucs; @@ -168,30 +170,29 @@ pdf_to_utf8_imp(fz_context *ctx, const unsigned char *srcptr, size_t srclen) return dst; } -/* Convert Unicode/PdfDocEncoding string into utf-8 */ +/* Convert text string object to UTF-8 */ char * -pdf_to_utf8(fz_context *ctx, pdf_obj *src) +pdf_new_utf8_from_pdf_string_obj(fz_context *ctx, pdf_obj *src) { - unsigned char *srcptr; + const char *srcptr; size_t srclen; - srcptr = (unsigned char *) pdf_to_str_buf(ctx, src); - srclen = pdf_to_str_len(ctx, src); - return pdf_to_utf8_imp(ctx, srcptr, srclen); + srcptr = pdf_to_string(ctx, src, &srclen); + return pdf_new_utf8_from_pdf_string(ctx, srcptr, srclen); } /* Load text stream and convert to UTF-8 */ char * -pdf_load_stream_as_utf8(fz_context *ctx, pdf_obj *src) +pdf_new_utf8_from_pdf_stream_obj(fz_context *ctx, pdf_obj *src) { fz_buffer *stmbuf; - unsigned char *srcptr; + char *srcptr; size_t srclen; char *dst = NULL; stmbuf = pdf_load_stream(ctx, src); - srclen = fz_buffer_storage(ctx, stmbuf, &srcptr); + srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr); fz_try(ctx) - dst = pdf_to_utf8_imp(ctx, srcptr, srclen); + dst = pdf_new_utf8_from_pdf_string(ctx, srcptr, srclen); fz_always(ctx) fz_drop_buffer(ctx, stmbuf); fz_catch(ctx) @@ -204,17 +205,8 @@ char * pdf_load_stream_or_string_as_utf8(fz_context *ctx, pdf_obj *src) { if (pdf_is_stream(ctx, src)) - return pdf_load_stream_as_utf8(ctx, src); - return pdf_to_utf8(ctx, src); -} - -pdf_obj * -pdf_to_utf8_name(fz_context *ctx, pdf_obj *src) -{ - char *buf = pdf_to_utf8(ctx, src); - pdf_obj *dst = pdf_new_name(ctx, buf); - fz_free(ctx, buf); - return dst; + return pdf_new_utf8_from_pdf_stream_obj(ctx, src); + return pdf_new_utf8_from_pdf_string_obj(ctx, src); } static pdf_obj * diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index 9d32ea5e..d60eb089 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -2220,7 +2220,7 @@ pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *b if (strstr(key, "info:") == key) { pdf_obj *info; - char *s; + const char *s; int n; info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)); @@ -2231,9 +2231,8 @@ pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *b if (!info) return -1; - s = pdf_to_utf8(ctx, info); + s = pdf_to_text_string(ctx, info); n = (int)fz_strlcpy(buf, s, size); - fz_free(ctx, s); return n; } diff --git a/source/tools/murun.c b/source/tools/murun.c index 3ed840fb..c74e4bbc 100644 --- a/source/tools/murun.c +++ b/source/tools/murun.c @@ -3837,19 +3837,14 @@ static void ffi_PDFObject_asString(js_State *J) { fz_context *ctx = js_getcontext(J); pdf_obj *obj = js_touserdata(J, 0, "pdf_obj"); - char *string = NULL; + const char *string = NULL; fz_try(ctx) - string = pdf_to_utf8(ctx, obj); + string = pdf_to_text_string(ctx, obj); fz_catch(ctx) rethrow(J); - if (js_try(J)) { - fz_free(ctx, string); - js_throw(J); - } js_pushstring(J, string); - fz_free(ctx, string); js_endtry(J); } @@ -3858,12 +3853,11 @@ static void ffi_PDFObject_asByteString(js_State *J) fz_context *ctx = js_getcontext(J); pdf_obj *obj = js_touserdata(J, 0, "pdf_obj"); const char *buf; - int i, len = 0; + size_t i, len = 0; - fz_try(ctx) { - buf = pdf_to_str_buf(ctx, obj); - len = pdf_to_str_len(ctx, obj); - } fz_catch(ctx) + fz_try(ctx) + buf = pdf_to_string(ctx, obj, &len); + fz_catch(ctx) rethrow(J); js_newarray(J); @@ -4076,21 +4070,14 @@ static void ffi_PDFAnnotation_getContents(js_State *J) { fz_context *ctx = js_getcontext(J); pdf_annot *annot = js_touserdata(J, 0, "pdf_annot"); - char *contents = NULL; + const char *contents = NULL; fz_try(ctx) - contents = pdf_copy_annot_contents(ctx, annot); + contents = pdf_get_annot_contents(ctx, annot); fz_catch(ctx) rethrow(J); - if (js_try(J)) { - fz_free(ctx, contents); - js_throw(J); - } js_pushstring(J, contents); - js_endtry(J); - - fz_free(ctx, contents); } static void ffi_PDFAnnotation_setContents(js_State *J) @@ -4375,21 +4362,14 @@ static void ffi_PDFAnnotation_getAuthor(js_State *J) { fz_context *ctx = js_getcontext(J); pdf_annot *annot = js_touserdata(J, 0, "pdf_annot"); - char *author = NULL; + const char *author = NULL; fz_try(ctx) - author = pdf_copy_annot_author(ctx, annot); + author = pdf_get_annot_author(ctx, annot); fz_catch(ctx) rethrow(J); - if (js_try(J)) { - fz_free(ctx, author); - js_throw(J); - } js_pushstring(J, author); - js_endtry(J); - - fz_free(ctx, author); } static void ffi_PDFAnnotation_setAuthor(js_State *J) |