Add pdf_new_text_string utility function.

Create a PDF 'text string' type string from a UTF-8 input string. If the input is plain ASCII, keep it as is, otherwise re-encode it as UTF-16BE.
author: Tor Andersson <tor.andersson@artifex.com> 2017-11-20 18:24:36 +0100
committer: Tor Andersson <tor.andersson@artifex.com> 2017-11-22 23:09:51 +0100
commit: 6aed88ce02fa6fa77b4c9b4e91846a6160119335 (patch)
tree: ec7a55112a54888cfa34484d603d85e0f14df8e8 /source/pdf
parent: b13d7c7d2609ae32cb8fd4931eee12084e7b77b3 (diff)
download: mupdf-6aed88ce02fa6fa77b4c9b4e91846a6160119335.tar.xz
1 files changed, 40 insertions, 0 deletions
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index 797d4c56..451d9e1d 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -317,6 +317,46 @@ pdf_to_utf8_name(fz_context *ctx, pdf_document *doc, pdf_obj *src)
 	return dst;
 }
 
+static pdf_obj *
+pdf_new_text_string_utf16be(fz_context *ctx, pdf_document *doc, const char *s)
+{
+	int c, i = 0, n = fz_utflen(s);
+	unsigned char *p = fz_malloc(ctx, n * 2 + 2);
+	pdf_obj *obj;
+	p[i++] = 254;
+	p[i++] = 255;
+	while (*s)
+	{
+		s += fz_chartorune(&c, s);
+		p[i++] = (c>>8) & 0xff;
+		p[i++] = (c) & 0xff;
+	}
+	fz_try(ctx)
+		obj = pdf_new_string(ctx, doc, (char*)p, i);
+	fz_always(ctx)
+		fz_free(ctx, p);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+	return obj;
+}
+
+/*
+ * Create a PDF 'text string' by encoding input string as either ASCII or UTF-16BE.
+ * In theory, we could also use PDFDocEncoding.
+ */
+pdf_obj *
+pdf_new_text_string(fz_context *ctx, pdf_document *doc, const char *s)
+{
+	int i = 0;
+	while (s[i] != 0)
+	{
+		if (((unsigned char)s[i]) >= 128)
+			return pdf_new_text_string_utf16be(ctx, doc, s);
+		++i;
+	}
+	return pdf_new_string(ctx, doc, s, i);
+}
+
 pdf_obj *
 pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
 {
author	Tor Andersson <tor.andersson@artifex.com>	2017-11-20 18:24:36 +0100
committer	Tor Andersson <tor.andersson@artifex.com>	2017-11-22 23:09:51 +0100
commit	6aed88ce02fa6fa77b4c9b4e91846a6160119335 (patch)
tree	ec7a55112a54888cfa34484d603d85e0f14df8e8 /source/pdf
parent	b13d7c7d2609ae32cb8fd4931eee12084e7b77b3 (diff)
download	mupdf-6aed88ce02fa6fa77b4c9b4e91846a6160119335.tar.xz