summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2018-06-19 13:15:31 +0200
committerRobin Watts <robin.watts@artifex.com>2018-06-22 16:48:50 +0100
commitf7ace61076d0ab3c71e1d2bd70967ddb0b86f220 (patch)
tree8bd8b4fe4d7414685d162f66911e9951ea3dce16 /source/pdf
parent148b0934370336fc8b260e8c3aef83daf4d80ba4 (diff)
downloadmupdf-f7ace61076d0ab3c71e1d2bd70967ddb0b86f220.tar.xz
Keep copy of decoded utf8 text string in pdf_obj.
Removes the need to alloc/free text strings in the API, allowing for simple functions like pdf_dict_get_text_string.
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-annot.c12
-rw-r--r--source/pdf/pdf-appearance.c11
-rw-r--r--source/pdf/pdf-form.c21
-rw-r--r--source/pdf/pdf-link.c2
-rw-r--r--source/pdf/pdf-nametree.c10
-rw-r--r--source/pdf/pdf-object.c53
-rw-r--r--source/pdf/pdf-outline.c2
-rw-r--r--source/pdf/pdf-parse.c36
-rw-r--r--source/pdf/pdf-xref.c5
9 files changed, 91 insertions, 61 deletions
diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c
index ed6856de..ba92b7ef 100644
--- a/source/pdf/pdf-annot.c
+++ b/source/pdf/pdf-annot.c
@@ -392,10 +392,10 @@ pdf_set_annot_rect(fz_context *ctx, pdf_annot *annot, const fz_rect *rect)
pdf_dirty_annot(ctx, annot);
}
-char *
-pdf_copy_annot_contents(fz_context *ctx, pdf_annot *annot)
+const char *
+pdf_get_annot_contents(fz_context *ctx, pdf_annot *annot)
{
- return pdf_to_utf8(ctx, pdf_dict_get(ctx, annot->obj, PDF_NAME(Contents)));
+ return pdf_dict_get_text_string(ctx, annot->obj, PDF_NAME(Contents));
}
void
@@ -1397,11 +1397,11 @@ pdf_annot_has_author(fz_context *ctx, pdf_annot *annot)
return is_allowed_subtype(ctx, annot, PDF_NAME(T), markup_subtypes);
}
-char *
-pdf_copy_annot_author(fz_context *ctx, pdf_annot *annot)
+const char *
+pdf_get_annot_author(fz_context *ctx, pdf_annot *annot)
{
check_allowed_subtypes(ctx, annot, PDF_NAME(T), markup_subtypes);
- return pdf_to_utf8(ctx, pdf_dict_get(ctx, annot->obj, PDF_NAME(T)));
+ return pdf_dict_get_text_string(ctx, annot->obj, PDF_NAME(T));
}
void
diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c
index c3e94baa..9753be92 100644
--- a/source/pdf/pdf-appearance.c
+++ b/source/pdf/pdf-appearance.c
@@ -906,11 +906,12 @@ pdf_write_free_text_appearance(fz_context *ctx, pdf_annot *annot, fz_buffer *buf
{
const char *font;
float size, color[3];
- char *text;
+ const char *text;
float w, h, t, b;
int q, r;
/* /Rotate is an undocumented annotation property supported by Adobe */
+ text = pdf_get_annot_contents(ctx, annot);
r = pdf_dict_get_int(ctx, annot->obj, PDF_NAME(Rotate));
q = pdf_annot_quadding(ctx, annot);
pdf_annot_default_appearance(ctx, annot, &font, &size, color);
@@ -933,13 +934,7 @@ pdf_write_free_text_appearance(fz_context *ctx, pdf_annot *annot, fz_buffer *buf
fz_append_printf(ctx, buf, "%g %g %g %g re\nS\n", b/2, b/2, w-b, h-b);
}
- text = pdf_copy_annot_contents(ctx, annot);
- fz_try(ctx)
- write_variable_text(ctx, annot, buf, res, text, font, size, color, q, w, h, b*2, 1);
- fz_always(ctx)
- fz_free(ctx, text);
- fz_catch(ctx)
- fz_rethrow(ctx);
+ write_variable_text(ctx, annot, buf, res, text, font, size, color, q, w, h, b*2, 1);
}
static void
diff --git a/source/pdf/pdf-form.c b/source/pdf/pdf-form.c
index 54cb1472..93b88e2c 100644
--- a/source/pdf/pdf-form.c
+++ b/source/pdf/pdf-form.c
@@ -1102,7 +1102,7 @@ int pdf_text_widget_set_text(fz_context *ctx, pdf_document *doc, pdf_widget *tw,
}
/* Get either the listed value or the export value. */
-int pdf_choice_widget_options(fz_context *ctx, pdf_document *doc, pdf_widget *tw, int exportval, char *opts[])
+int pdf_choice_widget_options(fz_context *ctx, pdf_document *doc, pdf_widget *tw, int exportval, const char *opts[])
{
pdf_annot *annot = (pdf_annot *)tw;
pdf_obj *optarr;
@@ -1122,11 +1122,11 @@ int pdf_choice_widget_options(fz_context *ctx, pdf_document *doc, pdf_widget *tw
/* If it is a two element array, the second item is the one that we want if we want the listing value. */
if (m == 2)
if (exportval)
- opts[i] = pdf_to_utf8(ctx, pdf_array_get(ctx, pdf_array_get(ctx, optarr, i), 0));
+ opts[i] = pdf_array_get_text_string(ctx, pdf_array_get(ctx, optarr, i), 0);
else
- opts[i] = pdf_to_utf8(ctx, pdf_array_get(ctx, pdf_array_get(ctx, optarr, i), 1));
+ opts[i] = pdf_array_get_text_string(ctx, pdf_array_get(ctx, optarr, i), 1);
else
- opts[i] = pdf_to_utf8(ctx, pdf_array_get(ctx, optarr, i));
+ opts[i] = pdf_array_get_text_string(ctx, optarr, i);
}
}
return n;
@@ -1148,7 +1148,7 @@ int pdf_choice_widget_is_multiselect(fz_context *ctx, pdf_document *doc, pdf_wid
}
}
-int pdf_choice_widget_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, char *opts[])
+int pdf_choice_widget_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, const char *opts[])
{
pdf_annot *annot = (pdf_annot *)tw;
pdf_obj *optarr;
@@ -1162,32 +1162,27 @@ int pdf_choice_widget_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw,
if (pdf_is_string(ctx, optarr))
{
if (opts)
- opts[0] = pdf_to_utf8(ctx, optarr);
-
+ opts[0] = pdf_to_text_string(ctx, optarr);
return 1;
}
else
{
n = pdf_array_len(ctx, optarr);
-
if (opts)
{
for (i = 0; i < n; i++)
{
pdf_obj *elem = pdf_array_get(ctx, optarr, i);
-
if (pdf_is_array(ctx, elem))
elem = pdf_array_get(ctx, elem, 1);
-
- opts[i] = pdf_to_utf8(ctx, elem);
+ opts[i] = pdf_to_text_string(ctx, elem);
}
}
-
return n;
}
}
-void pdf_choice_widget_set_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, int n, char *opts[])
+void pdf_choice_widget_set_value(fz_context *ctx, pdf_document *doc, pdf_widget *tw, int n, const char *opts[])
{
pdf_annot *annot = (pdf_annot *)tw;
pdf_obj *optarr = NULL, *opt;
diff --git a/source/pdf/pdf-link.c b/source/pdf/pdf-link.c
index 56c11973..e9f1c36f 100644
--- a/source/pdf/pdf-link.c
+++ b/source/pdf/pdf-link.c
@@ -134,7 +134,7 @@ pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_
return NULL;
}
- path = pdf_to_utf8(ctx, filename);
+ path = fz_strdup(ctx, pdf_to_text_string(ctx, filename));
#ifdef _WIN32
if (!pdf_name_eq(ctx, pdf_dict_get(ctx, file_spec, PDF_NAME(FS)), PDF_NAME(URL)))
{
diff --git a/source/pdf/pdf-nametree.c b/source/pdf/pdf-nametree.c
index 30dd5974..4682024f 100644
--- a/source/pdf/pdf-nametree.c
+++ b/source/pdf/pdf-nametree.c
@@ -140,9 +140,13 @@ pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_ob
pdf_obj *val = pdf_array_get(ctx, names, i + 1);
if (pdf_is_string(ctx, key))
{
- key = pdf_to_utf8_name(ctx, key);
- pdf_dict_put(ctx, dict, key, val);
- pdf_drop_obj(ctx, key);
+ key = pdf_new_name(ctx, pdf_to_text_string(ctx, key));
+ fz_try(ctx)
+ pdf_dict_put(ctx, dict, key, val);
+ fz_always(ctx)
+ pdf_drop_obj(ctx, key);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
}
else if (pdf_is_name(ctx, key))
{
diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c
index ec66376a..bc856644 100644
--- a/source/pdf/pdf-object.c
+++ b/source/pdf/pdf-object.c
@@ -59,6 +59,7 @@ typedef struct pdf_obj_num_s
typedef struct pdf_obj_string_s
{
pdf_obj super;
+ char *text; /* utf8 encoded text string */
unsigned int len;
char buf[1];
} pdf_obj_string;
@@ -141,6 +142,7 @@ pdf_new_string(fz_context *ctx, const char *str, size_t len)
obj->super.refs = 1;
obj->super.kind = PDF_STRING;
obj->super.flags = 0;
+ obj->text = NULL;
obj->len = l;
memcpy(obj->buf, str, len);
obj->buf[len] = '\0';
@@ -336,6 +338,32 @@ int pdf_to_str_len(fz_context *ctx, pdf_obj *obj)
return 0;
}
+const char *pdf_to_string(fz_context *ctx, pdf_obj *obj, size_t *sizep)
+{
+ RESOLVE(obj);
+ if (OBJ_IS_STRING(obj))
+ {
+ if (sizep)
+ *sizep = STRING(obj)->len;
+ return STRING(obj)->buf;
+ }
+ if (sizep)
+ *sizep = 0;
+ return "";
+}
+
+const char *pdf_to_text_string(fz_context *ctx, pdf_obj *obj)
+{
+ RESOLVE(obj);
+ if (OBJ_IS_STRING(obj))
+ {
+ if (!STRING(obj)->text)
+ STRING(obj)->text = pdf_new_utf8_from_pdf_string(ctx, STRING(obj)->buf, STRING(obj)->len);
+ return STRING(obj)->text;
+ }
+ return "";
+}
+
void pdf_set_int(fz_context *ctx, pdf_obj *obj, int64_t i)
{
if (OBJ_IS_INT(obj))
@@ -1669,6 +1697,11 @@ pdf_drop_obj(fz_context *ctx, pdf_obj *obj)
pdf_drop_array(ctx, obj);
else if (obj->kind == PDF_DICT)
pdf_drop_dict(ctx, obj);
+ else if (obj->kind == PDF_STRING)
+ {
+ fz_free(ctx, STRING(obj)->text);
+ fz_free(ctx, obj);
+ }
else
fz_free(ctx, obj);
}
@@ -2254,10 +2287,12 @@ const char *pdf_dict_get_name(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
const char *pdf_dict_get_string(fz_context *ctx, pdf_obj *dict, pdf_obj *key, size_t *sizep)
{
- pdf_obj *val = pdf_dict_get(ctx, dict, key);
- if (sizep)
- *sizep = pdf_to_str_len(ctx, val);
- return pdf_to_str_buf(ctx, val);
+ return pdf_to_string(ctx, pdf_dict_get(ctx, dict, key), sizep);
+}
+
+const char *pdf_dict_get_text_string(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
+{
+ return pdf_to_text_string(ctx, pdf_dict_get(ctx, dict, key));
}
int pdf_array_get_bool(fz_context *ctx, pdf_obj *array, int index)
@@ -2274,3 +2309,13 @@ float pdf_array_get_real(fz_context *ctx, pdf_obj *array, int index)
{
return pdf_to_real(ctx, pdf_array_get(ctx, array, index));
}
+
+const char *pdf_array_get_string(fz_context *ctx, pdf_obj *array, int index, size_t *sizep)
+{
+ return pdf_to_string(ctx, pdf_array_get(ctx, array, index), sizep);
+}
+
+const char *pdf_array_get_text_string(fz_context *ctx, pdf_obj *array, int index)
+{
+ return pdf_to_text_string(ctx, pdf_array_get(ctx, array, index));
+}
diff --git a/source/pdf/pdf-outline.c b/source/pdf/pdf-outline.c
index ea8ecbeb..b091cf4a 100644
--- a/source/pdf/pdf-outline.c
+++ b/source/pdf/pdf-outline.c
@@ -24,7 +24,7 @@ pdf_load_outline_imp(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
obj = pdf_dict_get(ctx, dict, PDF_NAME(Title));
if (obj)
- node->title = pdf_to_utf8(ctx, obj);
+ node->title = fz_strdup(ctx, pdf_to_text_string(ctx, obj));
if ((obj = pdf_dict_get(ctx, dict, PDF_NAME(Dest))) != NULL)
node->uri = pdf_parse_link_dest(ctx, doc, obj);
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index f4075c5a..fb1cc595 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -80,9 +80,11 @@ skip_language_code_utf8(const unsigned char *s, size_t n, size_t i)
return 0;
}
+/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
-pdf_to_utf8_imp(fz_context *ctx, const unsigned char *srcptr, size_t srclen)
+pdf_new_utf8_from_pdf_string(fz_context *ctx, const char *ssrcptr, size_t srclen)
{
+ const unsigned char *srcptr = (const unsigned char*)ssrcptr;
char *dstptr, *dst;
size_t dstlen = 0;
int ucs;
@@ -168,30 +170,29 @@ pdf_to_utf8_imp(fz_context *ctx, const unsigned char *srcptr, size_t srclen)
return dst;
}
-/* Convert Unicode/PdfDocEncoding string into utf-8 */
+/* Convert text string object to UTF-8 */
char *
-pdf_to_utf8(fz_context *ctx, pdf_obj *src)
+pdf_new_utf8_from_pdf_string_obj(fz_context *ctx, pdf_obj *src)
{
- unsigned char *srcptr;
+ const char *srcptr;
size_t srclen;
- srcptr = (unsigned char *) pdf_to_str_buf(ctx, src);
- srclen = pdf_to_str_len(ctx, src);
- return pdf_to_utf8_imp(ctx, srcptr, srclen);
+ srcptr = pdf_to_string(ctx, src, &srclen);
+ return pdf_new_utf8_from_pdf_string(ctx, srcptr, srclen);
}
/* Load text stream and convert to UTF-8 */
char *
-pdf_load_stream_as_utf8(fz_context *ctx, pdf_obj *src)
+pdf_new_utf8_from_pdf_stream_obj(fz_context *ctx, pdf_obj *src)
{
fz_buffer *stmbuf;
- unsigned char *srcptr;
+ char *srcptr;
size_t srclen;
char *dst = NULL;
stmbuf = pdf_load_stream(ctx, src);
- srclen = fz_buffer_storage(ctx, stmbuf, &srcptr);
+ srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr);
fz_try(ctx)
- dst = pdf_to_utf8_imp(ctx, srcptr, srclen);
+ dst = pdf_new_utf8_from_pdf_string(ctx, srcptr, srclen);
fz_always(ctx)
fz_drop_buffer(ctx, stmbuf);
fz_catch(ctx)
@@ -204,17 +205,8 @@ char *
pdf_load_stream_or_string_as_utf8(fz_context *ctx, pdf_obj *src)
{
if (pdf_is_stream(ctx, src))
- return pdf_load_stream_as_utf8(ctx, src);
- return pdf_to_utf8(ctx, src);
-}
-
-pdf_obj *
-pdf_to_utf8_name(fz_context *ctx, pdf_obj *src)
-{
- char *buf = pdf_to_utf8(ctx, src);
- pdf_obj *dst = pdf_new_name(ctx, buf);
- fz_free(ctx, buf);
- return dst;
+ return pdf_new_utf8_from_pdf_stream_obj(ctx, src);
+ return pdf_new_utf8_from_pdf_string_obj(ctx, src);
}
static pdf_obj *
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 9d32ea5e..d60eb089 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -2220,7 +2220,7 @@ pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *b
if (strstr(key, "info:") == key)
{
pdf_obj *info;
- char *s;
+ const char *s;
int n;
info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
@@ -2231,9 +2231,8 @@ pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *b
if (!info)
return -1;
- s = pdf_to_utf8(ctx, info);
+ s = pdf_to_text_string(ctx, info);
n = (int)fz_strlcpy(buf, s, size);
- fz_free(ctx, s);
return n;
}