summaryrefslogtreecommitdiff
path: root/source/pdf/pdf-unicode.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2013-06-19 15:29:44 +0200
committerTor Andersson <tor.andersson@artifex.com>2013-06-20 16:45:35 +0200
commit0a927854a10e1e6b9770a81e2e1d9f3093631757 (patch)
tree3d65d820d9fdba2d0d394d99c36290c851b78ca0 /source/pdf/pdf-unicode.c
parent1ae8f19179c5f0f8c6352b3c7855465325d5449a (diff)
downloadmupdf-0a927854a10e1e6b9770a81e2e1d9f3093631757.tar.xz
Rearrange source files.
Diffstat (limited to 'source/pdf/pdf-unicode.c')
-rw-r--r--source/pdf/pdf-unicode.c77
1 files changed, 77 insertions, 0 deletions
diff --git a/source/pdf/pdf-unicode.c b/source/pdf/pdf-unicode.c
new file mode 100644
index 00000000..694cbac6
--- /dev/null
+++ b/source/pdf/pdf-unicode.c
@@ -0,0 +1,77 @@
+#include "mupdf/pdf.h"
+
+/* Load or synthesize ToUnicode map for fonts */
+
+void
+pdf_load_to_unicode(pdf_document *xref, pdf_font_desc *font,
+ char **strings, char *collection, pdf_obj *cmapstm)
+{
+ pdf_cmap *cmap;
+ int cid;
+ int ucsbuf[8];
+ int ucslen;
+ int i;
+ fz_context *ctx = xref->ctx;
+
+ if (pdf_is_stream(xref, pdf_to_num(cmapstm), pdf_to_gen(cmapstm)))
+ {
+ cmap = pdf_load_embedded_cmap(xref, cmapstm);
+
+ font->to_unicode = pdf_new_cmap(ctx);
+
+ for (i = 0; i < (strings ? 256 : 65536); i++)
+ {
+ cid = pdf_lookup_cmap(font->encoding, i);
+ if (cid >= 0)
+ {
+ ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf);
+ if (ucslen == 1)
+ pdf_map_range_to_range(ctx, font->to_unicode, cid, cid, ucsbuf[0]);
+ if (ucslen > 1)
+ pdf_map_one_to_many(ctx, font->to_unicode, cid, ucsbuf, ucslen);
+ }
+ }
+
+ pdf_sort_cmap(ctx, font->to_unicode);
+
+ pdf_drop_cmap(ctx, cmap);
+ font->size += pdf_cmap_size(ctx, font->to_unicode);
+ }
+
+ else if (collection)
+ {
+ if (!strcmp(collection, "Adobe-CNS1"))
+ font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
+ else if (!strcmp(collection, "Adobe-GB1"))
+ font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
+ else if (!strcmp(collection, "Adobe-Japan1"))
+ font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
+ else if (!strcmp(collection, "Adobe-Korea1"))
+ font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
+
+ return;
+ }
+
+ if (strings)
+ {
+ /* TODO one-to-many mappings */
+
+ font->cid_to_ucs_len = 256;
+ font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof(unsigned short));
+ font->size += 256 * sizeof(unsigned short);
+
+ for (i = 0; i < 256; i++)
+ {
+ if (strings[i])
+ font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]);
+ else
+ font->cid_to_ucs[i] = '?';
+ }
+ }
+
+ if (!font->to_unicode && !font->cid_to_ucs)
+ {
+ /* TODO: synthesize a ToUnicode if it's a freetype font with
+ * cmap and/or post tables or if it has glyph names. */
+ }
+}