summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-encoding.c38
-rw-r--r--source/pdf/pdf-encodings.h104
-rw-r--r--source/pdf/pdf-font.c51
3 files changed, 189 insertions, 4 deletions
diff --git a/source/pdf/pdf-encoding.c b/source/pdf/pdf-encoding.c
index 0b2cdfca..04874457 100644
--- a/source/pdf/pdf-encoding.c
+++ b/source/pdf/pdf-encoding.c
@@ -85,3 +85,41 @@ pdf_lookup_agl_duplicates(int ucs)
}
return empty_dup_list;
}
+
+int pdf_cyrillic_from_unicode(int u)
+{
+ int l = 0;
+ int r = nelem(koi8u_from_unicode) - 1;
+ if (u < 128)
+ return u;
+ while (l <= r)
+ {
+ int m = (l + r) >> 1;
+ if (u < koi8u_from_unicode[m].u)
+ r = m - 1;
+ else if (u > koi8u_from_unicode[m].u)
+ l = m + 1;
+ else
+ return koi8u_from_unicode[m].c;
+ }
+ return -1;
+}
+
+int pdf_greek_from_unicode(int u)
+{
+ int l = 0;
+ int r = nelem(iso8859_7_from_unicode) - 1;
+ if (u < 128)
+ return u;
+ while (l <= r)
+ {
+ int m = (l + r) >> 1;
+ if (u < iso8859_7_from_unicode[m].u)
+ r = m - 1;
+ else if (u > iso8859_7_from_unicode[m].u)
+ l = m + 1;
+ else
+ return iso8859_7_from_unicode[m].c;
+ }
+ return -1;
+}
diff --git a/source/pdf/pdf-encodings.h b/source/pdf/pdf-encodings.h
index 025e9d03..a5d2e7ec 100644
--- a/source/pdf/pdf-encodings.h
+++ b/source/pdf/pdf-encodings.h
@@ -213,3 +213,107 @@ const char * const pdf_win_ansi[256] = { _notdef, _notdef, _notdef,
"divide", "oslash", "ugrave", "uacute", "ucircumflex", "udieresis",
"yacute", "thorn", "ydieresis"
};
+
+const char * const pdf_glyph_name_from_koi8u[128] = {
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, "integraltp", _notdef, "bulletoperator",
+ "radical", "approxequal", "lessequal", "greaterequal",
+ "nonbreakingspace", "integralbt", "degree", "twosuperior",
+ "periodcentered", "divide", _notdef, _notdef, _notdef, "iocyrillic",
+ "ecyrillic", _notdef, "icyrillic", "yicyrillic", _notdef, _notdef,
+ _notdef, _notdef, _notdef, "gheupturncyrillic", _notdef, _notdef,
+ _notdef, _notdef, _notdef, "afii10023", "afii10053", _notdef,
+ "afii10055", "afii10056", _notdef, _notdef, _notdef, _notdef, _notdef,
+ "afii10050", _notdef, "copyright", "iucyrillic", "afii10065",
+ "becyrillic", "tsecyrillic", "decyrillic", "iecyrillic", "efcyrillic",
+ "gecyrillic", "khacyrillic", "iicyrillic", "iishortcyrillic",
+ "kacyrillic", "elcyrillic", "emcyrillic", "encyrillic", "ocyrillic",
+ "pecyrillic", "iacyrillic", "ercyrillic", "escyrillic", "tecyrillic",
+ "ucyrillic", "zhecyrillic", "vecyrillic", "softsigncyrillic",
+ "yericyrillic", "zecyrillic", "shacyrillic", "ereversedcyrillic",
+ "shchacyrillic", "checyrillic", "hardsigncyrillic", "afii10048",
+ "afii10017", "afii10018", "afii10040", "afii10021", "afii10022",
+ "afii10038", "afii10020", "afii10039", "afii10026", "afii10027",
+ "afii10028", "afii10029", "afii10030", "afii10031", "afii10032",
+ "afii10033", "afii10049", "afii10034", "afii10035", "afii10036",
+ "afii10037", "afii10024", "afii10019", "afii10046", "afii10045",
+ "afii10025", "afii10042", "afii10047", "afii10043", "afii10041",
+ "afii10044",
+};
+
+static const struct { unsigned short u, c; } koi8u_from_unicode[] = {
+ {0x00a0,154}, {0x00a9,191}, {0x00b0,156}, {0x00b2,157}, {0x00b7,158},
+ {0x00f7,159}, {0x0401,179}, {0x0404,180}, {0x0406,182}, {0x0407,183},
+ {0x0410,225}, {0x0411,226}, {0x0412,247}, {0x0413,231}, {0x0414,228},
+ {0x0415,229}, {0x0416,246}, {0x0417,250}, {0x0418,233}, {0x0419,234},
+ {0x041a,235}, {0x041b,236}, {0x041c,237}, {0x041d,238}, {0x041e,239},
+ {0x041f,240}, {0x0420,242}, {0x0421,243}, {0x0422,244}, {0x0423,245},
+ {0x0424,230}, {0x0425,232}, {0x0426,227}, {0x0427,254}, {0x0428,251},
+ {0x0429,253}, {0x042a,255}, {0x042b,249}, {0x042c,248}, {0x042d,252},
+ {0x042e,224}, {0x042f,241}, {0x0430,193}, {0x0431,194}, {0x0432,215},
+ {0x0433,199}, {0x0434,196}, {0x0435,197}, {0x0436,214}, {0x0437,218},
+ {0x0438,201}, {0x0439,202}, {0x043a,203}, {0x043b,204}, {0x043c,205},
+ {0x043d,206}, {0x043e,207}, {0x043f,208}, {0x0440,210}, {0x0441,211},
+ {0x0442,212}, {0x0443,213}, {0x0444,198}, {0x0445,200}, {0x0446,195},
+ {0x0447,222}, {0x0448,219}, {0x0449,221}, {0x044a,223}, {0x044b,217},
+ {0x044c,216}, {0x044d,220}, {0x044e,192}, {0x044f,209}, {0x0451,163},
+ {0x0454,164}, {0x0456,166}, {0x0457,167}, {0x0490,189}, {0x0491,173},
+ {0x2219,149}, {0x221a,150}, {0x2248,151}, {0x2264,152}, {0x2265,153},
+ {0x2320,147}, {0x2321,155}, {0x2500,128}, {0x2502,129}, {0x250c,130},
+ {0x2510,131}, {0x2514,132}, {0x2518,133}, {0x251c,134}, {0x2524,135},
+ {0x252c,136}, {0x2534,137}, {0x253c,138}, {0x2550,160}, {0x2551,161},
+ {0x2552,162}, {0x2554,165}, {0x2557,168}, {0x2558,169}, {0x2559,170},
+ {0x255a,171}, {0x255b,172}, {0x255d,174}, {0x255e,175}, {0x255f,176},
+ {0x2560,177}, {0x2561,178}, {0x2563,181}, {0x2566,184}, {0x2567,185},
+ {0x2568,186}, {0x2569,187}, {0x256a,188}, {0x256c,190}, {0x2580,139},
+ {0x2584,140}, {0x2588,141}, {0x258c,142}, {0x2590,143}, {0x2591,144},
+ {0x2592,145}, {0x2593,146}, {0x25a0,148}
+};
+
+const char * const pdf_glyph_name_from_iso8859_7[128] = {
+ /* the block drawing characters have been omitted */
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ "nonbreakingspace", "quoteleft", "quoteright", "sterling", "euro",
+ _notdef, "brokenbar", "section", "dieresis", "copyright",
+ "ypogegrammeni", "guillemotleft", "logicalnot", "softhyphen", _notdef,
+ "horizontalbar", "degree", "plusminus", "twosuperior", "threesuperior",
+ "tonos", "dieresistonos", "Alphatonos", "periodcentered",
+ "Epsilontonos", "Etatonos", "Iotatonos", "guillemotright",
+ "Omicrontonos", "onehalf", "Upsilontonos", "Omegatonos",
+ "iotadieresistonos", "Alpha", "Beta", "Gamma", "Deltagreek", "Epsilon",
+ "Zeta", "Eta", "Theta", "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi",
+ "Omicron", "Pi", "Rho", _notdef, "Sigma", "Tau", "Upsilon", "Phi",
+ "Chi", "Psi", "Omegagreek", "Iotadieresis", "Upsilondieresis",
+ "alphatonos", "epsilontonos", "etatonos", "iotatonos",
+ "upsilondieresistonos", "alpha", "beta", "gamma", "delta", "epsilon",
+ "zeta", "eta", "theta", "iota", "kappa", "lambda", "mugreek", "nu",
+ "xi", "omicron", "pi", "rho", "sigmafinal", "sigma", "tau", "upsilon",
+ "phi", "chi", "psi", "omega", "iotadieresis", "upsilondieresis",
+ "omicrontonos", "upsilontonos", "omegatonos", _notdef,
+};
+
+static const struct { unsigned short u, c; } iso8859_7_from_unicode[] = {
+ {0x00a0,160}, {0x00a3,163}, {0x00a6,166}, {0x00a7,167}, {0x00a8,168},
+ {0x00a9,169}, {0x00ab,171}, {0x00ac,172}, {0x00ad,173}, {0x00b0,176},
+ {0x00b1,177}, {0x00b2,178}, {0x00b3,179}, {0x00b7,183}, {0x00bb,187},
+ {0x00bd,189}, {0x037a,170}, {0x0384,180}, {0x0385,181}, {0x0386,182},
+ {0x0388,184}, {0x0389,185}, {0x038a,186}, {0x038c,188}, {0x038e,190},
+ {0x038f,191}, {0x0390,192}, {0x0391,193}, {0x0392,194}, {0x0393,195},
+ {0x0394,196}, {0x0395,197}, {0x0396,198}, {0x0397,199}, {0x0398,200},
+ {0x0399,201}, {0x039a,202}, {0x039b,203}, {0x039c,204}, {0x039d,205},
+ {0x039e,206}, {0x039f,207}, {0x03a0,208}, {0x03a1,209}, {0x03a3,211},
+ {0x03a4,212}, {0x03a5,213}, {0x03a6,214}, {0x03a7,215}, {0x03a8,216},
+ {0x03a9,217}, {0x03aa,218}, {0x03ab,219}, {0x03ac,220}, {0x03ad,221},
+ {0x03ae,222}, {0x03af,223}, {0x03b0,224}, {0x03b1,225}, {0x03b2,226},
+ {0x03b3,227}, {0x03b4,228}, {0x03b5,229}, {0x03b6,230}, {0x03b7,231},
+ {0x03b8,232}, {0x03b9,233}, {0x03ba,234}, {0x03bb,235}, {0x03bc,236},
+ {0x03bd,237}, {0x03be,238}, {0x03bf,239}, {0x03c0,240}, {0x03c1,241},
+ {0x03c2,242}, {0x03c3,243}, {0x03c4,244}, {0x03c5,245}, {0x03c6,246},
+ {0x03c7,247}, {0x03c8,248}, {0x03c9,249}, {0x03ca,250}, {0x03cb,251},
+ {0x03cc,252}, {0x03cd,253}, {0x03ce,254}, {0x2015,175}, {0x2018,161},
+ {0x2019,162}, {0x20ac,164},
+};
diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c
index 6be72250..69044c53 100644
--- a/source/pdf/pdf-font.c
+++ b/source/pdf/pdf-font.c
@@ -2147,9 +2147,51 @@ pdf_add_cid_font(fz_context *ctx, pdf_document *doc, fz_font *font)
return fref;
}
-/* Creates simple font */
+/* Create simple (8-bit encoding) fonts */
+
+static void
+pdf_add_simple_font_encoding_imp(fz_context *ctx, pdf_document *doc, pdf_obj *font, const char * const glyph_names[])
+{
+ pdf_obj *enc, *diff;
+ int i, last;
+
+ pdf_dict_put_drop(ctx, font, PDF_NAME_Encoding, enc = pdf_new_dict(ctx, doc, 3));
+ pdf_dict_put(ctx, enc, PDF_NAME_BaseEncoding, PDF_NAME_WinAnsiEncoding);
+ pdf_dict_put_drop(ctx, enc, PDF_NAME_Differences, diff = pdf_new_array(ctx, doc, 129));
+ last = 0;
+ for (i = 128; i < 256; ++i)
+ {
+ const char *glyph = glyph_names[i-128];
+ if (glyph)
+ {
+ if (last != i-1)
+ pdf_array_push_int(ctx, diff, i);
+ last = i;
+ pdf_array_push_name(ctx, diff, glyph);
+ }
+ }
+}
+
+static void
+pdf_add_simple_font_encoding(fz_context *ctx, pdf_document *doc, pdf_obj *fobj, int encoding)
+{
+ switch (encoding)
+ {
+ default:
+ case PDF_SIMPLE_ENCODING_LATIN:
+ pdf_dict_put(ctx, fobj, PDF_NAME_Encoding, PDF_NAME_WinAnsiEncoding);
+ break;
+ case PDF_SIMPLE_ENCODING_GREEK:
+ pdf_add_simple_font_encoding_imp(ctx, doc, fobj, pdf_glyph_name_from_iso8859_7);
+ break;
+ case PDF_SIMPLE_ENCODING_CYRILLIC:
+ pdf_add_simple_font_encoding_imp(ctx, doc, fobj, pdf_glyph_name_from_koi8u);
+ break;
+ }
+}
+
pdf_obj *
-pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font)
+pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font, int encoding)
{
pdf_obj *fobj = NULL;
pdf_obj *fref = NULL;
@@ -2174,7 +2216,7 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font)
/* Before we add this font as a resource check if the same font
* already exists in our resources for this doc. If yes, then
* hand back that reference */
- fref = pdf_find_font_resource(ctx, doc, PDF_SIMPLE_FONT_RESOURCE, 0, font->buffer, digest);
+ fref = pdf_find_font_resource(ctx, doc, PDF_SIMPLE_FONT_RESOURCE, encoding, font->buffer, digest);
if (fref == NULL)
{
fobj = pdf_new_dict(ctx, doc, 10);
@@ -2184,7 +2226,6 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font)
case TYPE1: pdf_dict_put(ctx, fobj, PDF_NAME_Subtype, PDF_NAME_Type1); break;
case TRUETYPE: pdf_dict_put(ctx, fobj, PDF_NAME_Subtype, PDF_NAME_TrueType); break;
}
- pdf_dict_put(ctx, fobj, PDF_NAME_Encoding, PDF_NAME_WinAnsiEncoding);
if (!is_builtin_font(ctx, font))
{
@@ -2213,6 +2254,8 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font)
pdf_dict_put_name(ctx, fobj, PDF_NAME_BaseFont, clean_font_name(font->name));
}
+ pdf_add_simple_font_encoding(ctx, doc, fobj, encoding);
+
fref = pdf_add_object(ctx, doc, fobj);
/* Add ref to our font resource hash table. */