summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/pdf/object.h1
-rw-r--r--source/pdf/pdf-parse.c40
2 files changed, 41 insertions, 0 deletions
diff --git a/include/mupdf/pdf/object.h b/include/mupdf/pdf/object.h
index a1825eb2..21ed8595 100644
--- a/include/mupdf/pdf/object.h
+++ b/include/mupdf/pdf/object.h
@@ -17,6 +17,7 @@ pdf_obj *pdf_new_int(fz_context *ctx, pdf_document *doc, int64_t i);
pdf_obj *pdf_new_real(fz_context *ctx, pdf_document *doc, float f);
pdf_obj *pdf_new_name(fz_context *ctx, pdf_document *doc, const char *str);
pdf_obj *pdf_new_string(fz_context *ctx, pdf_document *doc, const char *str, size_t len);
+pdf_obj *pdf_new_text_string(fz_context *ctx, pdf_document *doc, const char *s);
pdf_obj *pdf_new_indirect(fz_context *ctx, pdf_document *doc, int num, int gen);
pdf_obj *pdf_new_array(fz_context *ctx, pdf_document *doc, int initialcap);
pdf_obj *pdf_new_dict(fz_context *ctx, pdf_document *doc, int initialcap);
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index 797d4c56..451d9e1d 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -317,6 +317,46 @@ pdf_to_utf8_name(fz_context *ctx, pdf_document *doc, pdf_obj *src)
return dst;
}
+static pdf_obj *
+pdf_new_text_string_utf16be(fz_context *ctx, pdf_document *doc, const char *s)
+{
+ int c, i = 0, n = fz_utflen(s);
+ unsigned char *p = fz_malloc(ctx, n * 2 + 2);
+ pdf_obj *obj;
+ p[i++] = 254;
+ p[i++] = 255;
+ while (*s)
+ {
+ s += fz_chartorune(&c, s);
+ p[i++] = (c>>8) & 0xff;
+ p[i++] = (c) & 0xff;
+ }
+ fz_try(ctx)
+ obj = pdf_new_string(ctx, doc, (char*)p, i);
+ fz_always(ctx)
+ fz_free(ctx, p);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ return obj;
+}
+
+/*
+ * Create a PDF 'text string' by encoding input string as either ASCII or UTF-16BE.
+ * In theory, we could also use PDFDocEncoding.
+ */
+pdf_obj *
+pdf_new_text_string(fz_context *ctx, pdf_document *doc, const char *s)
+{
+ int i = 0;
+ while (s[i] != 0)
+ {
+ if (((unsigned char)s[i]) >= 128)
+ return pdf_new_text_string_utf16be(ctx, doc, s);
+ ++i;
+ }
+ return pdf_new_string(ctx, doc, s, i);
+}
+
pdf_obj *
pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
{