summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2011-04-04 02:27:31 +0200
committerTor Andersson <tor.andersson@artifex.com>2011-04-04 02:27:31 +0200
commitf0c292559fd1d484b0c71fb75d49f2c820234424 (patch)
tree1d141e873cffb920dd51341cb60bd8b91402d94c
parenta287d03f33f05a842fdd725f1ab95dbeafa9fe5b (diff)
downloadmupdf-f0c292559fd1d484b0c71fb75d49f2c820234424.tar.xz
pdf: Add workaround for S22PDF lying about encodings.
A popular pdf creator from china has produced many files without embedded fonts, which state that they use WinAnsiEncoding, but in fact use cp936. This workaround detects some of these cases. Thanks to SumatraPDF for the patch.
-rw-r--r--mupdf/pdf_font.c72
1 files changed, 40 insertions, 32 deletions
diff --git a/mupdf/pdf_font.c b/mupdf/pdf_font.c
index 67af2b38..75bc141f 100644
--- a/mupdf/pdf_font.c
+++ b/mupdf/pdf_font.c
@@ -253,7 +253,6 @@ loadsimplefont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict)
fz_obj *widths;
unsigned short *etable = nil;
pdf_fontdesc *fontdesc;
- fz_bbox bbox;
FT_Face face;
FT_CharMap cmap;
int kind;
@@ -284,25 +283,49 @@ loadsimplefont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict)
if (error)
goto cleanup;
- face = fontdesc->font->ftface;
- kind = ftkind(face);
-
- pdf_logfont("ft name '%s' '%s'\n", face->family_name, face->style_name);
-
- bbox.x0 = (face->bbox.xMin * 1000) / face->units_per_EM;
- bbox.y0 = (face->bbox.yMin * 1000) / face->units_per_EM;
- bbox.x1 = (face->bbox.xMax * 1000) / face->units_per_EM;
- bbox.y1 = (face->bbox.yMax * 1000) / face->units_per_EM;
+ /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
+ if (!*fontdesc->font->name &&
+ !fz_dictgets(dict, "ToUnicode") &&
+ !strcmp(fz_toname(fz_dictgets(dict, "Encoding")), "WinAnsiEncoding") &&
+ fz_toint(fz_dictgets(descriptor, "Flags")) == 4)
+ {
+ /* note: without the comma, pdf_loadfontdescriptor would prefer /FontName over /BaseFont */
+ char *cp936fonts[] = {
+ "\xCB\xCE\xCC\xE5", "SimSun,Regular",
+ "\xBA\xDA\xCC\xE5", "SimHei,Regular",
+ "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
+ "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
+ "\xC1\xA5\xCA\xE9", "SimLi,Regular",
+ NULL
+ };
+ for (i = 0; cp936fonts[i]; i += 2)
+ if (!strcmp(basefont, cp936fonts[i]))
+ break;
+ if (cp936fonts[i])
+ {
+ fz_warn("workaround for S22PDF lying about chinese font encodings");
+ pdf_dropfont(fontdesc);
+ fontdesc = pdf_newfontdesc();
+ error = pdf_loadfontdescriptor(fontdesc, xref, descriptor, "Adobe-GB1", cp936fonts[i+1]);
+ error |= pdf_loadsystemcmap(&fontdesc->encoding, "GBK-EUC-H");
+ error |= pdf_loadsystemcmap(&fontdesc->tounicode, "Adobe-GB1-UCS2");
+ error |= pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-GB1-UCS2");
+ if (error)
+ return fz_rethrow(error, "cannot load font");
- pdf_logfont("ft bbox [%d %d %d %d]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1);
+ face = fontdesc->font->ftface;
+ kind = ftkind(face);
+ goto skip_encoding;
+ }
+ }
- if (bbox.x0 == bbox.x1)
- fz_setfontbbox(fontdesc->font, -1000, -1000, 2000, 2000);
- else
- fz_setfontbbox(fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1);
+ face = fontdesc->font->ftface;
+ kind = ftkind(face);
/* Encoding */
+ pdf_logfont("ft name '%s' '%s'\n", face->family_name, face->style_name);
+
symbolic = fontdesc->flags & 4;
if (face->num_charmaps > 0)
@@ -490,6 +513,8 @@ loadsimplefont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict)
if (error)
fz_catch(error, "cannot load tounicode");
+skip_encoding:
+
/* Widths */
pdf_setdefaulthmtx(fontdesc, fontdesc->missingwidth);
@@ -548,7 +573,6 @@ loadcidfont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict, fz_obj *enco
fz_obj *descriptor;
pdf_fontdesc *fontdesc;
FT_Face face;
- fz_bbox bbox;
int kind;
char collection[256];
char *basefont;
@@ -603,18 +627,6 @@ loadcidfont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict, fz_obj *enco
face = fontdesc->font->ftface;
kind = ftkind(face);
- bbox.x0 = (face->bbox.xMin * 1000) / face->units_per_EM;
- bbox.y0 = (face->bbox.yMin * 1000) / face->units_per_EM;
- bbox.x1 = (face->bbox.xMax * 1000) / face->units_per_EM;
- bbox.y1 = (face->bbox.yMax * 1000) / face->units_per_EM;
-
- pdf_logfont("ft bbox [%d %d %d %d]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1);
-
- if (bbox.x0 == bbox.x1)
- fz_setfontbbox(fontdesc->font, -1000, -1000, 2000, 2000);
- else
- fz_setfontbbox(fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1);
-
/* Check for DynaLab fonts that must use hinting */
if (kind == TRUETYPE)
{
@@ -855,7 +867,6 @@ pdf_loadfontdescriptor(pdf_fontdesc *fontdesc, pdf_xref *xref, fz_obj *dict, cha
{
fz_error error;
fz_obj *obj1, *obj2, *obj3, *obj;
- fz_rect bbox;
char *fontname;
char *origname;
@@ -877,9 +888,6 @@ pdf_loadfontdescriptor(pdf_fontdesc *fontdesc, pdf_xref *xref, fz_obj *dict, cha
fontdesc->xheight = fz_toreal(fz_dictgets(dict, "XHeight"));
fontdesc->missingwidth = fz_toreal(fz_dictgets(dict, "MissingWidth"));
- bbox = pdf_torect(fz_dictgets(dict, "FontBBox"));
- pdf_logfont("bbox [%g %g %g %g]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1);
-
pdf_logfont("flags %d\n", fontdesc->flags);
obj1 = fz_dictgets(dict, "FontFile");