1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
#include "mupdf/pdf.h"
/* Load or synthesize ToUnicode map for fonts */
static void find_min_max_cpt(pdf_cmap *cmap, unsigned int *minp, unsigned int *maxp)
{
unsigned int min = UINT_MAX;
unsigned int max = 0;
int i;
for (i = 0; i < cmap->rlen; ++i)
{
if (cmap->ranges[i].low < min)
min = cmap->ranges[i].low;
if (cmap->ranges[i].high > max)
max = cmap->ranges[i].high;
}
for (i = 0; i < cmap->xlen; ++i)
{
if (cmap->xranges[i].low < min)
min = cmap->xranges[i].low;
if (cmap->xranges[i].high > max)
max = cmap->xranges[i].high;
}
for (i = 0; i < cmap->mlen; ++i)
{
if (cmap->mranges[i].low < min)
min = cmap->mranges[i].low;
if (cmap->mranges[i].low > max)
max = cmap->mranges[i].low;
}
*minp = min;
*maxp = max;
}
void
pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font,
char **strings, char *collection, pdf_obj *cmapstm)
{
unsigned int cpt, min, max;
int gid;
int ucsbuf[8];
int ucslen;
int i;
fz_context *ctx = doc->ctx;
if (pdf_is_stream(doc, pdf_to_num(cmapstm), pdf_to_gen(cmapstm)))
{
pdf_cmap *gid_from_cpt = font->encoding;
pdf_cmap *ucs_from_cpt = pdf_load_embedded_cmap(doc, cmapstm);
font->to_unicode = pdf_new_cmap(ctx);
/* in case the code space range is much larger than the actual number of characters */
find_min_max_cpt(gid_from_cpt, &min, &max);
for (i = 0; i < gid_from_cpt->codespace_len; ++i)
{
unsigned int l = gid_from_cpt->codespace[i].low;
unsigned int h = gid_from_cpt->codespace[i].high;
l = l < min ? min : l > max ? max : l;
h = h < min ? min : h > max ? max : h;
for (cpt = l; cpt <= h; ++cpt)
{
gid = pdf_lookup_cmap(gid_from_cpt, cpt);
if (gid >= 0)
{
ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt, ucsbuf);
if (ucslen == 1)
pdf_map_range_to_range(ctx, font->to_unicode, gid, gid, ucsbuf[0]);
if (ucslen > 1)
pdf_map_one_to_many(ctx, font->to_unicode, gid, ucsbuf, ucslen);
}
}
}
pdf_sort_cmap(ctx, font->to_unicode);
pdf_drop_cmap(ctx, ucs_from_cpt);
font->size += pdf_cmap_size(ctx, font->to_unicode);
}
else if (collection)
{
if (!strcmp(collection, "Adobe-CNS1"))
font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
else if (!strcmp(collection, "Adobe-GB1"))
font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
else if (!strcmp(collection, "Adobe-Japan1"))
font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
else if (!strcmp(collection, "Adobe-Korea1"))
font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
return;
}
if (strings)
{
/* TODO one-to-many mappings */
font->cid_to_ucs_len = 256;
font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof *font->cid_to_ucs);
font->size += 256 * sizeof *font->cid_to_ucs;
for (cpt = 0; cpt < 256; cpt++)
{
if (strings[cpt])
font->cid_to_ucs[cpt] = pdf_lookup_agl(strings[cpt]);
else
font->cid_to_ucs[cpt] = '?';
}
}
if (!font->to_unicode && !font->cid_to_ucs)
{
/* TODO: synthesize a ToUnicode if it's a freetype font with
* cmap and/or post tables or if it has glyph names. */
}
}
|