summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2017-11-20 18:24:36 +0100
committerTor Andersson <tor.andersson@artifex.com>2017-11-22 23:09:51 +0100
commit6aed88ce02fa6fa77b4c9b4e91846a6160119335 (patch)
treeec7a55112a54888cfa34484d603d85e0f14df8e8 /source/pdf
parentb13d7c7d2609ae32cb8fd4931eee12084e7b77b3 (diff)
downloadmupdf-6aed88ce02fa6fa77b4c9b4e91846a6160119335.tar.xz
Add pdf_new_text_string utility function.
Create a PDF 'text string' type string from a UTF-8 input string. If the input is plain ASCII, keep it as is, otherwise re-encode it as UTF-16BE.
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-parse.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index 797d4c56..451d9e1d 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -317,6 +317,46 @@ pdf_to_utf8_name(fz_context *ctx, pdf_document *doc, pdf_obj *src)
return dst;
}
+static pdf_obj *
+pdf_new_text_string_utf16be(fz_context *ctx, pdf_document *doc, const char *s)
+{
+ int c, i = 0, n = fz_utflen(s);
+ unsigned char *p = fz_malloc(ctx, n * 2 + 2);
+ pdf_obj *obj;
+ p[i++] = 254;
+ p[i++] = 255;
+ while (*s)
+ {
+ s += fz_chartorune(&c, s);
+ p[i++] = (c>>8) & 0xff;
+ p[i++] = (c) & 0xff;
+ }
+ fz_try(ctx)
+ obj = pdf_new_string(ctx, doc, (char*)p, i);
+ fz_always(ctx)
+ fz_free(ctx, p);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ return obj;
+}
+
+/*
+ * Create a PDF 'text string' by encoding input string as either ASCII or UTF-16BE.
+ * In theory, we could also use PDFDocEncoding.
+ */
+pdf_obj *
+pdf_new_text_string(fz_context *ctx, pdf_document *doc, const char *s)
+{
+ int i = 0;
+ while (s[i] != 0)
+ {
+ if (((unsigned char)s[i]) >= 128)
+ return pdf_new_text_string_utf16be(ctx, doc, s);
+ ++i;
+ }
+ return pdf_new_string(ctx, doc, s, i);
+}
+
pdf_obj *
pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
{