diff options
-rw-r--r-- | Jamfile | 4 | ||||
-rw-r--r-- | include/mupdf/rsrc.h | 15 | ||||
-rw-r--r-- | mupdf/font.c | 58 | ||||
-rw-r--r-- | mupdf/fontagl.c (renamed from mupdf/fontagl.h) | 41 | ||||
-rw-r--r-- | mupdf/fontenc.c (renamed from mupdf/fontenc.h) | 34 | ||||
-rw-r--r-- | mupdf/interpret.c | 2 | ||||
-rw-r--r-- | mupdf/type3.c | 48 | ||||
-rw-r--r-- | object/simple.c | 4 | ||||
-rw-r--r-- | test/pdfrip.c | 4 |
9 files changed, 114 insertions, 96 deletions
@@ -110,8 +110,10 @@ Library libmupdf : mupdf/function.c mupdf/cmap.c - mupdf/font.c + mupdf/fontagl.c + mupdf/fontenc.c mupdf/fontfile.c + mupdf/font.c mupdf/type3.c mupdf/colorspace.c mupdf/image.c diff --git a/include/mupdf/rsrc.h b/include/mupdf/rsrc.h index 30a059b1..bb29dc8a 100644 --- a/include/mupdf/rsrc.h +++ b/include/mupdf/rsrc.h @@ -103,6 +103,21 @@ fz_error *pdf_loadimage(pdf_image **imgp, pdf_xref *xref, fz_obj *obj, fz_obj *s * CMap and Font */ +struct pdf_aglpair { char *name; unsigned short code; }; +extern struct pdf_aglpair pdf_adobeglyphlist[]; +extern int pdf_adobeglyphlen; + +void pdf_loadencoding(char **estrings, char *encoding); +int pdf_lookupagl(char *name); + +extern char *pdf_macroman[256]; +extern char *pdf_macexpert[256]; +extern char *pdf_winansi[256]; +extern char *pdf_standard[256]; +extern char *pdf_expert[256]; +extern char *pdf_symbol[256]; +extern char *pdf_zapfdingbats[256]; + typedef struct pdf_font_s pdf_font; struct pdf_font_s diff --git a/mupdf/font.c b/mupdf/font.c index 231cfc1f..d9683596 100644 --- a/mupdf/font.c +++ b/mupdf/font.c @@ -10,9 +10,6 @@ #include FT_FREETYPE_H #include <freetype/internal/ftobjs.h> -#include "fontenc.h" -#include "fontagl.h" - static char *basefontnames[14][7] = { { "Courier", "CourierNew", "CourierNewPSMT", 0 }, @@ -163,54 +160,11 @@ static char *cleanfontname(char *fontname) return fontname; } -static void loadencoding(char **estrings, char *encoding) -{ - char **bstrings = nil; - int i; - - if (!strcmp(encoding, "MacRomanEncoding")) - bstrings = macroman; - if (!strcmp(encoding, "MacExpertEncoding")) - bstrings = macexpert; - if (!strcmp(encoding, "WinAnsiEncoding")) - bstrings = winansi; - - if (bstrings) - for (i = 0; i < 256; i++) - estrings[i] = bstrings[i]; -} - -static int aglcode(char *name) -{ - int l = 0; - int r = adobeglyphlen; - - while (l <= r) - { - int m = (l + r) >> 1; - int c = strcmp(name, adobeglyphlist[m].name); - if (c < 0) - r = m - 1; - else if (c > 0) - l = m + 1; - else - return adobeglyphlist[m].code; - } - - if (strstr(name, "uni") == name) - return strtol(name + 3, 0, 16); - - if (strstr(name, "u") == name) - return strtol(name + 1, 0, 16); - - return -1; -} - static int mrecode(char *name) { int i; for (i = 0; i < 256; i++) - if (macroman[i] && !strcmp(name, macroman[i])) + if (pdf_macroman[i] && !strcmp(name, pdf_macroman[i])) return i; return -1; } @@ -376,7 +330,7 @@ printf("loading simple font %s\n", basefont); for (i = 0; i < 256; i++) { - estrings[i] = _notdef; + estrings[i] = nil; etable[i] = 0; } @@ -388,7 +342,7 @@ printf("loading simple font %s\n", basefont); goto cleanup; if (fz_isname(encoding)) - loadencoding(estrings, fz_toname(encoding)); + pdf_loadencoding(estrings, fz_toname(encoding)); if (fz_isdict(encoding)) { @@ -396,7 +350,7 @@ printf("loading simple font %s\n", basefont); base = fz_dictgets(encoding, "BaseEncoding"); if (fz_isname(base)) - loadencoding(estrings, fz_toname(base)); + pdf_loadencoding(estrings, fz_toname(base)); diff = fz_dictgets(encoding, "Differences"); if (fz_isarray(diff)) @@ -434,7 +388,7 @@ printf(" winansi cmap\n"); for (i = 0; i < 256; i++) if (estrings[i]) { - k = aglcode(estrings[i]); + k = pdf_lookupagl(estrings[i]); if (k == -1) etable[i] = FT_Get_Name_Index(face, estrings[i]); else @@ -497,7 +451,7 @@ printf(" builtin encoding\n"); for (i = 0; i < 256; i++) if (estrings[i]) - utable[i] = aglcode(estrings[i]); + utable[i] = pdf_lookupagl(estrings[i]); else utable[i] = i; diff --git a/mupdf/fontagl.h b/mupdf/fontagl.c index 847c0a21..2b083313 100644 --- a/mupdf/fontagl.h +++ b/mupdf/fontagl.c @@ -1,19 +1,44 @@ -/* Name: Adobe Glyph List +/* Name: Adobe Glyph List # Table version: 2.0 - # Date: September 20, 2002 + # Date: September 20, 2002 # # See http:partners.adobe.com/asn/developer/typeforum/unicodegn.html # # Format: Semicolon-delimited fields: - # (1) glyph name - # (2) Unicode scalar value + # (1) glyph name + # (2) Unicode scalar value */ -struct aglpair { char *name; int code; }; +#include <fitz.h> +#include <mupdf.h> -#define adobeglyphlen (sizeof(adobeglyphlist) / sizeof(struct aglpair)) +int pdf_lookupagl(char *name) +{ + int l = 0; + int r = pdf_adobeglyphlen; + + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(name, pdf_adobeglyphlist[m].name); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return pdf_adobeglyphlist[m].code; + } + + if (strstr(name, "uni") == name) + return strtol(name + 3, 0, 16); -static struct aglpair adobeglyphlist[] = + if (strstr(name, "u") == name) + return strtol(name + 1, 0, 16); + + return -1; +} + +struct pdf_aglpair pdf_adobeglyphlist[] = { {"A",0x0041}, @@ -4303,3 +4328,5 @@ static struct aglpair adobeglyphlist[] = */ }; +int pdf_adobeglyphlen = (sizeof(pdf_adobeglyphlist) / sizeof(struct pdf_aglpair)); + diff --git a/mupdf/fontenc.h b/mupdf/fontenc.c index d64e3e49..cdf082d7 100644 --- a/mupdf/fontenc.h +++ b/mupdf/fontenc.c @@ -2,9 +2,29 @@ * Built-in font tables */ +#include <fitz.h> +#include <mupdf.h> + #define _notdef 0 -static char *macroman[256] = { _notdef, _notdef, +void pdf_loadencoding(char **estrings, char *encoding) +{ + char **bstrings = nil; + int i; + + if (!strcmp(encoding, "MacRomanEncoding")) + bstrings = pdf_macroman; + if (!strcmp(encoding, "MacExpertEncoding")) + bstrings = pdf_macexpert; + if (!strcmp(encoding, "WinAnsiEncoding")) + bstrings = pdf_winansi; + + if (bstrings) + for (i = 0; i < 256; i++) + estrings[i] = bstrings[i]; +} + +char *pdf_macroman[256] = { _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, @@ -44,7 +64,7 @@ static char *macroman[256] = { _notdef, _notdef, "Ugrave", "dotlessi", "circumflex", "tilde", "macron", "breve", "dotaccent", "ring", "cedilla", "hungarumlaut", "ogonek", "caron" }; -static char *macexpert[256] = { _notdef, _notdef, +char *pdf_macexpert[256] = { _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, @@ -97,7 +117,7 @@ static char *macexpert[256] = { _notdef, _notdef, "msuperior", "commasuperior", "periodsuperior", "Dotaccentsmall", "Ringsmall", _notdef, _notdef, _notdef, _notdef }; -static char *winansi[256] = { _notdef, _notdef, _notdef, +char *pdf_winansi[256] = { _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, @@ -140,7 +160,7 @@ static char *winansi[256] = { _notdef, _notdef, _notdef, #if 0 -static char *standard[256] = { _notdef, _notdef, +char *pdf_standard[256] = { _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, @@ -180,7 +200,7 @@ static char *standard[256] = { _notdef, _notdef, _notdef, "dotlessi", _notdef, _notdef, "lslash", "oslash", "oe", "germandbls", _notdef, _notdef, _notdef, _notdef }; -static char *expert[256] = { _notdef, _notdef, _notdef, +char *pdf_expert[256] = { _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, @@ -233,7 +253,7 @@ static char *expert[256] = { _notdef, _notdef, _notdef, "Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall", "Thornsmall", "Ydieresissmall" }; -static char *symbol[256] = { _notdef, _notdef, _notdef, +char *pdf_symbol[256] = { _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, @@ -280,7 +300,7 @@ static char *symbol[256] = { _notdef, _notdef, _notdef, "bracketrightex", "bracketrightbt", "bracerighttp", "bracerightmid", "bracerightbt", _notdef }; -static char *zapfdingbats[256] = { _notdef, _notdef, +char *pdf_zapfdingbats[256] = { _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, diff --git a/mupdf/interpret.c b/mupdf/interpret.c index 5146bb2b..e377c5b6 100644 --- a/mupdf/interpret.c +++ b/mupdf/interpret.c @@ -82,8 +82,6 @@ runxobject(pdf_csi *csi, pdf_xref *xref, pdf_xobject *xobj) fz_node *transform; fz_file *file; -puts("run xobject"); - /* gsave */ if (csi->gtop == 31) return fz_throw("gstate overflow in content stream"); diff --git a/mupdf/type3.c b/mupdf/type3.c index eb403db4..60d7c38c 100644 --- a/mupdf/type3.c +++ b/mupdf/type3.c @@ -5,27 +5,6 @@ extern pdf_font *pdf_newfont(char *name); -/* TODO: factor out loadencoding which is common with simple fonts */ - -#include "fontenc.h" - -static void loadencoding(char **estrings, char *encoding) -{ - char **bstrings = nil; - int i; - - if (!strcmp(encoding, "MacRomanEncoding")) - bstrings = macroman; - if (!strcmp(encoding, "MacExpertEncoding")) - bstrings = macexpert; - if (!strcmp(encoding, "WinAnsiEncoding")) - bstrings = winansi; - - if (bstrings) - for (i = 0; i < 256; i++) - estrings[i] = bstrings[i]; -} - static void t3dropfont(fz_font *font) { @@ -124,6 +103,7 @@ pdf_loadtype3font(pdf_font **fontp, pdf_xref *xref, fz_obj *dict) { fz_error *error; char buf[256]; + unsigned short *utable; char *estrings[256]; pdf_font *font; fz_obj *encoding; @@ -188,7 +168,7 @@ printf(" matrix [%g %g %g %g %g %g]\n", goto cleanup; if (fz_isname(obj)) - loadencoding(estrings, fz_toname(encoding)); + pdf_loadencoding(estrings, fz_toname(encoding)); if (fz_isdict(encoding)) { @@ -196,7 +176,7 @@ printf(" matrix [%g %g %g %g %g %g]\n", base = fz_dictgets(encoding, "BaseEncoding"); if (fz_isname(base)) - loadencoding(estrings, fz_toname(base)); + pdf_loadencoding(estrings, fz_toname(base)); diff = fz_dictgets(encoding, "Differences"); if (fz_isarray(diff)) @@ -223,6 +203,28 @@ printf(" matrix [%g %g %g %g %g %g]\n", goto cleanup; /* + * ToUnicode + */ + + utable = fz_malloc(sizeof(unsigned short) * 256); + if (!utable) + goto cleanup; + + for (i = 0; i < 256; i++) + if (estrings[i]) + utable[i] = pdf_lookupagl(estrings[i]); + else + utable[i] = i; + + if (fz_dictgets(dict, "ToUnicode")) + { +printf(" load tounicode cmap for type3 font\n"); + } + + font->ncidtoucs = 256; + font->cidtoucs = utable; + + /* * Widths */ diff --git a/object/simple.c b/object/simple.c index aa736b8e..629fdf85 100644 --- a/object/simple.c +++ b/object/simple.c @@ -189,7 +189,7 @@ fz_toname(fz_obj *obj) { if (fz_isname(obj)) return obj->u.n; - return nil; + return ""; } char * @@ -197,7 +197,7 @@ fz_tostringbuf(fz_obj *obj) { if (fz_isstring(obj)) return obj->u.s.buf; - return nil; + return ""; } int diff --git a/test/pdfrip.c b/test/pdfrip.c index 45577b82..0837e969 100644 --- a/test/pdfrip.c +++ b/test/pdfrip.c @@ -83,9 +83,9 @@ void dumptext(fz_node *node) if (fabs(dy) > 1.6) puts("\n"); - else if (fabs(dy) > 0.1) + else if (fabs(dy) > 0.2) putchar('\n'); - else if (fabs(dx) > 0.1) + else if (fabs(dx) > 0.2) putchar(' '); h = fz_gethmtx(text->font, cid); |