summaryrefslogtreecommitdiff
path: root/pdf/pdf_unicode.c
blob: 6c2d6372693b0641740ea4b1feb24a8e090b681b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include "fitz.h"
#include "mupdf.h"

/* Load or synthesize ToUnicode map for fonts */

fz_error
pdf_loadtounicode(pdf_fontdesc *font, pdf_xref *xref,
	char **strings, char *collection, fz_obj *cmapstm)
{
	fz_error error = fz_okay;
	pdf_cmap *cmap;
	int cid;
	int ucsbuf[8];
	int ucslen;
	int i;

	if (pdf_isstream(xref, fz_tonum(cmapstm), fz_togen(cmapstm)))
	{
		pdf_logfont("tounicode embedded cmap\n");

		error = pdf_loadembeddedcmap(&cmap, xref, cmapstm);
		if (error)
			return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_tonum(cmapstm), fz_togen(cmapstm));

		font->tounicode = pdf_newcmap();

		for (i = 0; i < (strings ? 256 : 65536); i++)
		{
			cid = pdf_lookupcmap(font->encoding, i);
			if (cid >= 0)
			{
				ucslen = pdf_lookupcmapfull(cmap, i, ucsbuf);
				if (ucslen == 1)
					pdf_maprangetorange(font->tounicode, cid, cid, ucsbuf[0]);
				if (ucslen > 1)
					pdf_maponetomany(font->tounicode, cid, ucsbuf, ucslen);
			}
		}

		pdf_sortcmap(font->tounicode);

		pdf_dropcmap(cmap);
	}

	else if (collection)
	{
		pdf_logfont("tounicode cid collection (%s)\n", collection);

		error = fz_okay;

		if (!strcmp(collection, "Adobe-CNS1"))
			error = pdf_loadsystemcmap(&font->tounicode, "Adobe-CNS1-UCS2");
		else if (!strcmp(collection, "Adobe-GB1"))
			error = pdf_loadsystemcmap(&font->tounicode, "Adobe-GB1-UCS2");
		else if (!strcmp(collection, "Adobe-Japan1"))
			error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan1-UCS2");
		else if (!strcmp(collection, "Adobe-Japan2"))
			error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan2-UCS2"); /* where's this? */
		else if (!strcmp(collection, "Adobe-Korea1"))
			error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Korea1-UCS2");

		if (error)
			return fz_rethrow(error, "cannot load tounicode system cmap %s-UCS2", collection);
	}

	if (strings)
	{
		pdf_logfont("tounicode strings\n");

		/* TODO one-to-many mappings */

		font->ncidtoucs = 256;
		font->cidtoucs = fz_calloc(256, sizeof(unsigned short));

		for (i = 0; i < 256; i++)
		{
			if (strings[i])
				font->cidtoucs[i] = pdf_lookupagl(strings[i]);
			else
				font->cidtoucs[i] = '?';
		}
	}

	if (!font->tounicode && !font->cidtoucs)
	{
		pdf_logfont("tounicode could not be loaded\n");
		/* TODO: synthesize a ToUnicode if it's a freetype font with
		* cmap and/or post tables or if it has glyph names. */
	}

	return fz_okay;
}