diff options
Diffstat (limited to 'source/pdf')
-rw-r--r-- | source/pdf/pdf-encoding.c | 38 | ||||
-rw-r--r-- | source/pdf/pdf-encodings.h | 104 | ||||
-rw-r--r-- | source/pdf/pdf-font.c | 51 |
3 files changed, 189 insertions, 4 deletions
diff --git a/source/pdf/pdf-encoding.c b/source/pdf/pdf-encoding.c index 0b2cdfca..04874457 100644 --- a/source/pdf/pdf-encoding.c +++ b/source/pdf/pdf-encoding.c @@ -85,3 +85,41 @@ pdf_lookup_agl_duplicates(int ucs) } return empty_dup_list; } + +int pdf_cyrillic_from_unicode(int u) +{ + int l = 0; + int r = nelem(koi8u_from_unicode) - 1; + if (u < 128) + return u; + while (l <= r) + { + int m = (l + r) >> 1; + if (u < koi8u_from_unicode[m].u) + r = m - 1; + else if (u > koi8u_from_unicode[m].u) + l = m + 1; + else + return koi8u_from_unicode[m].c; + } + return -1; +} + +int pdf_greek_from_unicode(int u) +{ + int l = 0; + int r = nelem(iso8859_7_from_unicode) - 1; + if (u < 128) + return u; + while (l <= r) + { + int m = (l + r) >> 1; + if (u < iso8859_7_from_unicode[m].u) + r = m - 1; + else if (u > iso8859_7_from_unicode[m].u) + l = m + 1; + else + return iso8859_7_from_unicode[m].c; + } + return -1; +} diff --git a/source/pdf/pdf-encodings.h b/source/pdf/pdf-encodings.h index 025e9d03..a5d2e7ec 100644 --- a/source/pdf/pdf-encodings.h +++ b/source/pdf/pdf-encodings.h @@ -213,3 +213,107 @@ const char * const pdf_win_ansi[256] = { _notdef, _notdef, _notdef, "divide", "oslash", "ugrave", "uacute", "ucircumflex", "udieresis", "yacute", "thorn", "ydieresis" }; + +const char * const pdf_glyph_name_from_koi8u[128] = { + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, "integraltp", _notdef, "bulletoperator", + "radical", "approxequal", "lessequal", "greaterequal", + "nonbreakingspace", "integralbt", "degree", "twosuperior", + "periodcentered", "divide", _notdef, _notdef, _notdef, "iocyrillic", + "ecyrillic", _notdef, "icyrillic", "yicyrillic", _notdef, _notdef, + _notdef, _notdef, _notdef, "gheupturncyrillic", _notdef, _notdef, + _notdef, _notdef, _notdef, "afii10023", "afii10053", _notdef, + "afii10055", "afii10056", _notdef, _notdef, _notdef, _notdef, _notdef, + "afii10050", _notdef, "copyright", "iucyrillic", "afii10065", + "becyrillic", "tsecyrillic", "decyrillic", "iecyrillic", "efcyrillic", + "gecyrillic", "khacyrillic", "iicyrillic", "iishortcyrillic", + "kacyrillic", "elcyrillic", "emcyrillic", "encyrillic", "ocyrillic", + "pecyrillic", "iacyrillic", "ercyrillic", "escyrillic", "tecyrillic", + "ucyrillic", "zhecyrillic", "vecyrillic", "softsigncyrillic", + "yericyrillic", "zecyrillic", "shacyrillic", "ereversedcyrillic", + "shchacyrillic", "checyrillic", "hardsigncyrillic", "afii10048", + "afii10017", "afii10018", "afii10040", "afii10021", "afii10022", + "afii10038", "afii10020", "afii10039", "afii10026", "afii10027", + "afii10028", "afii10029", "afii10030", "afii10031", "afii10032", + "afii10033", "afii10049", "afii10034", "afii10035", "afii10036", + "afii10037", "afii10024", "afii10019", "afii10046", "afii10045", + "afii10025", "afii10042", "afii10047", "afii10043", "afii10041", + "afii10044", +}; + +static const struct { unsigned short u, c; } koi8u_from_unicode[] = { + {0x00a0,154}, {0x00a9,191}, {0x00b0,156}, {0x00b2,157}, {0x00b7,158}, + {0x00f7,159}, {0x0401,179}, {0x0404,180}, {0x0406,182}, {0x0407,183}, + {0x0410,225}, {0x0411,226}, {0x0412,247}, {0x0413,231}, {0x0414,228}, + {0x0415,229}, {0x0416,246}, {0x0417,250}, {0x0418,233}, {0x0419,234}, + {0x041a,235}, {0x041b,236}, {0x041c,237}, {0x041d,238}, {0x041e,239}, + {0x041f,240}, {0x0420,242}, {0x0421,243}, {0x0422,244}, {0x0423,245}, + {0x0424,230}, {0x0425,232}, {0x0426,227}, {0x0427,254}, {0x0428,251}, + {0x0429,253}, {0x042a,255}, {0x042b,249}, {0x042c,248}, {0x042d,252}, + {0x042e,224}, {0x042f,241}, {0x0430,193}, {0x0431,194}, {0x0432,215}, + {0x0433,199}, {0x0434,196}, {0x0435,197}, {0x0436,214}, {0x0437,218}, + {0x0438,201}, {0x0439,202}, {0x043a,203}, {0x043b,204}, {0x043c,205}, + {0x043d,206}, {0x043e,207}, {0x043f,208}, {0x0440,210}, {0x0441,211}, + {0x0442,212}, {0x0443,213}, {0x0444,198}, {0x0445,200}, {0x0446,195}, + {0x0447,222}, {0x0448,219}, {0x0449,221}, {0x044a,223}, {0x044b,217}, + {0x044c,216}, {0x044d,220}, {0x044e,192}, {0x044f,209}, {0x0451,163}, + {0x0454,164}, {0x0456,166}, {0x0457,167}, {0x0490,189}, {0x0491,173}, + {0x2219,149}, {0x221a,150}, {0x2248,151}, {0x2264,152}, {0x2265,153}, + {0x2320,147}, {0x2321,155}, {0x2500,128}, {0x2502,129}, {0x250c,130}, + {0x2510,131}, {0x2514,132}, {0x2518,133}, {0x251c,134}, {0x2524,135}, + {0x252c,136}, {0x2534,137}, {0x253c,138}, {0x2550,160}, {0x2551,161}, + {0x2552,162}, {0x2554,165}, {0x2557,168}, {0x2558,169}, {0x2559,170}, + {0x255a,171}, {0x255b,172}, {0x255d,174}, {0x255e,175}, {0x255f,176}, + {0x2560,177}, {0x2561,178}, {0x2563,181}, {0x2566,184}, {0x2567,185}, + {0x2568,186}, {0x2569,187}, {0x256a,188}, {0x256c,190}, {0x2580,139}, + {0x2584,140}, {0x2588,141}, {0x258c,142}, {0x2590,143}, {0x2591,144}, + {0x2592,145}, {0x2593,146}, {0x25a0,148} +}; + +const char * const pdf_glyph_name_from_iso8859_7[128] = { + /* the block drawing characters have been omitted */ + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + "nonbreakingspace", "quoteleft", "quoteright", "sterling", "euro", + _notdef, "brokenbar", "section", "dieresis", "copyright", + "ypogegrammeni", "guillemotleft", "logicalnot", "softhyphen", _notdef, + "horizontalbar", "degree", "plusminus", "twosuperior", "threesuperior", + "tonos", "dieresistonos", "Alphatonos", "periodcentered", + "Epsilontonos", "Etatonos", "Iotatonos", "guillemotright", + "Omicrontonos", "onehalf", "Upsilontonos", "Omegatonos", + "iotadieresistonos", "Alpha", "Beta", "Gamma", "Deltagreek", "Epsilon", + "Zeta", "Eta", "Theta", "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi", + "Omicron", "Pi", "Rho", _notdef, "Sigma", "Tau", "Upsilon", "Phi", + "Chi", "Psi", "Omegagreek", "Iotadieresis", "Upsilondieresis", + "alphatonos", "epsilontonos", "etatonos", "iotatonos", + "upsilondieresistonos", "alpha", "beta", "gamma", "delta", "epsilon", + "zeta", "eta", "theta", "iota", "kappa", "lambda", "mugreek", "nu", + "xi", "omicron", "pi", "rho", "sigmafinal", "sigma", "tau", "upsilon", + "phi", "chi", "psi", "omega", "iotadieresis", "upsilondieresis", + "omicrontonos", "upsilontonos", "omegatonos", _notdef, +}; + +static const struct { unsigned short u, c; } iso8859_7_from_unicode[] = { + {0x00a0,160}, {0x00a3,163}, {0x00a6,166}, {0x00a7,167}, {0x00a8,168}, + {0x00a9,169}, {0x00ab,171}, {0x00ac,172}, {0x00ad,173}, {0x00b0,176}, + {0x00b1,177}, {0x00b2,178}, {0x00b3,179}, {0x00b7,183}, {0x00bb,187}, + {0x00bd,189}, {0x037a,170}, {0x0384,180}, {0x0385,181}, {0x0386,182}, + {0x0388,184}, {0x0389,185}, {0x038a,186}, {0x038c,188}, {0x038e,190}, + {0x038f,191}, {0x0390,192}, {0x0391,193}, {0x0392,194}, {0x0393,195}, + {0x0394,196}, {0x0395,197}, {0x0396,198}, {0x0397,199}, {0x0398,200}, + {0x0399,201}, {0x039a,202}, {0x039b,203}, {0x039c,204}, {0x039d,205}, + {0x039e,206}, {0x039f,207}, {0x03a0,208}, {0x03a1,209}, {0x03a3,211}, + {0x03a4,212}, {0x03a5,213}, {0x03a6,214}, {0x03a7,215}, {0x03a8,216}, + {0x03a9,217}, {0x03aa,218}, {0x03ab,219}, {0x03ac,220}, {0x03ad,221}, + {0x03ae,222}, {0x03af,223}, {0x03b0,224}, {0x03b1,225}, {0x03b2,226}, + {0x03b3,227}, {0x03b4,228}, {0x03b5,229}, {0x03b6,230}, {0x03b7,231}, + {0x03b8,232}, {0x03b9,233}, {0x03ba,234}, {0x03bb,235}, {0x03bc,236}, + {0x03bd,237}, {0x03be,238}, {0x03bf,239}, {0x03c0,240}, {0x03c1,241}, + {0x03c2,242}, {0x03c3,243}, {0x03c4,244}, {0x03c5,245}, {0x03c6,246}, + {0x03c7,247}, {0x03c8,248}, {0x03c9,249}, {0x03ca,250}, {0x03cb,251}, + {0x03cc,252}, {0x03cd,253}, {0x03ce,254}, {0x2015,175}, {0x2018,161}, + {0x2019,162}, {0x20ac,164}, +}; diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c index 6be72250..69044c53 100644 --- a/source/pdf/pdf-font.c +++ b/source/pdf/pdf-font.c @@ -2147,9 +2147,51 @@ pdf_add_cid_font(fz_context *ctx, pdf_document *doc, fz_font *font) return fref; } -/* Creates simple font */ +/* Create simple (8-bit encoding) fonts */ + +static void +pdf_add_simple_font_encoding_imp(fz_context *ctx, pdf_document *doc, pdf_obj *font, const char * const glyph_names[]) +{ + pdf_obj *enc, *diff; + int i, last; + + pdf_dict_put_drop(ctx, font, PDF_NAME_Encoding, enc = pdf_new_dict(ctx, doc, 3)); + pdf_dict_put(ctx, enc, PDF_NAME_BaseEncoding, PDF_NAME_WinAnsiEncoding); + pdf_dict_put_drop(ctx, enc, PDF_NAME_Differences, diff = pdf_new_array(ctx, doc, 129)); + last = 0; + for (i = 128; i < 256; ++i) + { + const char *glyph = glyph_names[i-128]; + if (glyph) + { + if (last != i-1) + pdf_array_push_int(ctx, diff, i); + last = i; + pdf_array_push_name(ctx, diff, glyph); + } + } +} + +static void +pdf_add_simple_font_encoding(fz_context *ctx, pdf_document *doc, pdf_obj *fobj, int encoding) +{ + switch (encoding) + { + default: + case PDF_SIMPLE_ENCODING_LATIN: + pdf_dict_put(ctx, fobj, PDF_NAME_Encoding, PDF_NAME_WinAnsiEncoding); + break; + case PDF_SIMPLE_ENCODING_GREEK: + pdf_add_simple_font_encoding_imp(ctx, doc, fobj, pdf_glyph_name_from_iso8859_7); + break; + case PDF_SIMPLE_ENCODING_CYRILLIC: + pdf_add_simple_font_encoding_imp(ctx, doc, fobj, pdf_glyph_name_from_koi8u); + break; + } +} + pdf_obj * -pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font) +pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font, int encoding) { pdf_obj *fobj = NULL; pdf_obj *fref = NULL; @@ -2174,7 +2216,7 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font) /* Before we add this font as a resource check if the same font * already exists in our resources for this doc. If yes, then * hand back that reference */ - fref = pdf_find_font_resource(ctx, doc, PDF_SIMPLE_FONT_RESOURCE, 0, font->buffer, digest); + fref = pdf_find_font_resource(ctx, doc, PDF_SIMPLE_FONT_RESOURCE, encoding, font->buffer, digest); if (fref == NULL) { fobj = pdf_new_dict(ctx, doc, 10); @@ -2184,7 +2226,6 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font) case TYPE1: pdf_dict_put(ctx, fobj, PDF_NAME_Subtype, PDF_NAME_Type1); break; case TRUETYPE: pdf_dict_put(ctx, fobj, PDF_NAME_Subtype, PDF_NAME_TrueType); break; } - pdf_dict_put(ctx, fobj, PDF_NAME_Encoding, PDF_NAME_WinAnsiEncoding); if (!is_builtin_font(ctx, font)) { @@ -2213,6 +2254,8 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font) pdf_dict_put_name(ctx, fobj, PDF_NAME_BaseFont, clean_font_name(font->name)); } + pdf_add_simple_font_encoding(ctx, doc, fobj, encoding); + fref = pdf_add_object(ctx, doc, fobj); /* Add ref to our font resource hash table. */ |