summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2018-03-15 11:55:06 +0100
committerTor Andersson <tor.andersson@artifex.com>2018-03-16 14:51:41 +0100
commit1cd2046b3cce264a3d13481652868fd1c31537fc (patch)
tree8ba181113cc22f2ffe4ba1a3ed549926b747bc8c /source
parent2612c20b725319833caeef36ccf4240f34e0e24b (diff)
downloadmupdf-1cd2046b3cce264a3d13481652868fd1c31537fc.tar.xz
Add simple fonts with 8-bit greek and cyrillic encodings.
Use KOI8-U for Cyrillic, and ISO 8859-7 for Greek. Use with 'mutool create' using an extra argument to the %%Font directive: %%Font TmRmC Times-Roman Cyrillic BT /TmRmC 16 Tf 10 10 Td <fa c4 d2 c1 d7 d3 d4 d7 d5 ca d4 c5 21> Tj ET The alternatives are "Latin", "Greek", and "Cyrillic".
Diffstat (limited to 'source')
-rw-r--r--source/pdf/pdf-encoding.c38
-rw-r--r--source/pdf/pdf-encodings.h104
-rw-r--r--source/pdf/pdf-font.c51
-rw-r--r--source/tools/murun.c10
-rw-r--r--source/tools/pdfcreate.c19
-rw-r--r--source/tools/pdfportfolio.c2
6 files changed, 212 insertions, 12 deletions
diff --git a/source/pdf/pdf-encoding.c b/source/pdf/pdf-encoding.c
index 0b2cdfca..04874457 100644
--- a/source/pdf/pdf-encoding.c
+++ b/source/pdf/pdf-encoding.c
@@ -85,3 +85,41 @@ pdf_lookup_agl_duplicates(int ucs)
}
return empty_dup_list;
}
+
+int pdf_cyrillic_from_unicode(int u)
+{
+ int l = 0;
+ int r = nelem(koi8u_from_unicode) - 1;
+ if (u < 128)
+ return u;
+ while (l <= r)
+ {
+ int m = (l + r) >> 1;
+ if (u < koi8u_from_unicode[m].u)
+ r = m - 1;
+ else if (u > koi8u_from_unicode[m].u)
+ l = m + 1;
+ else
+ return koi8u_from_unicode[m].c;
+ }
+ return -1;
+}
+
+int pdf_greek_from_unicode(int u)
+{
+ int l = 0;
+ int r = nelem(iso8859_7_from_unicode) - 1;
+ if (u < 128)
+ return u;
+ while (l <= r)
+ {
+ int m = (l + r) >> 1;
+ if (u < iso8859_7_from_unicode[m].u)
+ r = m - 1;
+ else if (u > iso8859_7_from_unicode[m].u)
+ l = m + 1;
+ else
+ return iso8859_7_from_unicode[m].c;
+ }
+ return -1;
+}
diff --git a/source/pdf/pdf-encodings.h b/source/pdf/pdf-encodings.h
index 025e9d03..a5d2e7ec 100644
--- a/source/pdf/pdf-encodings.h
+++ b/source/pdf/pdf-encodings.h
@@ -213,3 +213,107 @@ const char * const pdf_win_ansi[256] = { _notdef, _notdef, _notdef,
"divide", "oslash", "ugrave", "uacute", "ucircumflex", "udieresis",
"yacute", "thorn", "ydieresis"
};
+
+const char * const pdf_glyph_name_from_koi8u[128] = {
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, "integraltp", _notdef, "bulletoperator",
+ "radical", "approxequal", "lessequal", "greaterequal",
+ "nonbreakingspace", "integralbt", "degree", "twosuperior",
+ "periodcentered", "divide", _notdef, _notdef, _notdef, "iocyrillic",
+ "ecyrillic", _notdef, "icyrillic", "yicyrillic", _notdef, _notdef,
+ _notdef, _notdef, _notdef, "gheupturncyrillic", _notdef, _notdef,
+ _notdef, _notdef, _notdef, "afii10023", "afii10053", _notdef,
+ "afii10055", "afii10056", _notdef, _notdef, _notdef, _notdef, _notdef,
+ "afii10050", _notdef, "copyright", "iucyrillic", "afii10065",
+ "becyrillic", "tsecyrillic", "decyrillic", "iecyrillic", "efcyrillic",
+ "gecyrillic", "khacyrillic", "iicyrillic", "iishortcyrillic",
+ "kacyrillic", "elcyrillic", "emcyrillic", "encyrillic", "ocyrillic",
+ "pecyrillic", "iacyrillic", "ercyrillic", "escyrillic", "tecyrillic",
+ "ucyrillic", "zhecyrillic", "vecyrillic", "softsigncyrillic",
+ "yericyrillic", "zecyrillic", "shacyrillic", "ereversedcyrillic",
+ "shchacyrillic", "checyrillic", "hardsigncyrillic", "afii10048",
+ "afii10017", "afii10018", "afii10040", "afii10021", "afii10022",
+ "afii10038", "afii10020", "afii10039", "afii10026", "afii10027",
+ "afii10028", "afii10029", "afii10030", "afii10031", "afii10032",
+ "afii10033", "afii10049", "afii10034", "afii10035", "afii10036",
+ "afii10037", "afii10024", "afii10019", "afii10046", "afii10045",
+ "afii10025", "afii10042", "afii10047", "afii10043", "afii10041",
+ "afii10044",
+};
+
+static const struct { unsigned short u, c; } koi8u_from_unicode[] = {
+ {0x00a0,154}, {0x00a9,191}, {0x00b0,156}, {0x00b2,157}, {0x00b7,158},
+ {0x00f7,159}, {0x0401,179}, {0x0404,180}, {0x0406,182}, {0x0407,183},
+ {0x0410,225}, {0x0411,226}, {0x0412,247}, {0x0413,231}, {0x0414,228},
+ {0x0415,229}, {0x0416,246}, {0x0417,250}, {0x0418,233}, {0x0419,234},
+ {0x041a,235}, {0x041b,236}, {0x041c,237}, {0x041d,238}, {0x041e,239},
+ {0x041f,240}, {0x0420,242}, {0x0421,243}, {0x0422,244}, {0x0423,245},
+ {0x0424,230}, {0x0425,232}, {0x0426,227}, {0x0427,254}, {0x0428,251},
+ {0x0429,253}, {0x042a,255}, {0x042b,249}, {0x042c,248}, {0x042d,252},
+ {0x042e,224}, {0x042f,241}, {0x0430,193}, {0x0431,194}, {0x0432,215},
+ {0x0433,199}, {0x0434,196}, {0x0435,197}, {0x0436,214}, {0x0437,218},
+ {0x0438,201}, {0x0439,202}, {0x043a,203}, {0x043b,204}, {0x043c,205},
+ {0x043d,206}, {0x043e,207}, {0x043f,208}, {0x0440,210}, {0x0441,211},
+ {0x0442,212}, {0x0443,213}, {0x0444,198}, {0x0445,200}, {0x0446,195},
+ {0x0447,222}, {0x0448,219}, {0x0449,221}, {0x044a,223}, {0x044b,217},
+ {0x044c,216}, {0x044d,220}, {0x044e,192}, {0x044f,209}, {0x0451,163},
+ {0x0454,164}, {0x0456,166}, {0x0457,167}, {0x0490,189}, {0x0491,173},
+ {0x2219,149}, {0x221a,150}, {0x2248,151}, {0x2264,152}, {0x2265,153},
+ {0x2320,147}, {0x2321,155}, {0x2500,128}, {0x2502,129}, {0x250c,130},
+ {0x2510,131}, {0x2514,132}, {0x2518,133}, {0x251c,134}, {0x2524,135},
+ {0x252c,136}, {0x2534,137}, {0x253c,138}, {0x2550,160}, {0x2551,161},
+ {0x2552,162}, {0x2554,165}, {0x2557,168}, {0x2558,169}, {0x2559,170},
+ {0x255a,171}, {0x255b,172}, {0x255d,174}, {0x255e,175}, {0x255f,176},
+ {0x2560,177}, {0x2561,178}, {0x2563,181}, {0x2566,184}, {0x2567,185},
+ {0x2568,186}, {0x2569,187}, {0x256a,188}, {0x256c,190}, {0x2580,139},
+ {0x2584,140}, {0x2588,141}, {0x258c,142}, {0x2590,143}, {0x2591,144},
+ {0x2592,145}, {0x2593,146}, {0x25a0,148}
+};
+
+const char * const pdf_glyph_name_from_iso8859_7[128] = {
+ /* the block drawing characters have been omitted */
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
+ "nonbreakingspace", "quoteleft", "quoteright", "sterling", "euro",
+ _notdef, "brokenbar", "section", "dieresis", "copyright",
+ "ypogegrammeni", "guillemotleft", "logicalnot", "softhyphen", _notdef,
+ "horizontalbar", "degree", "plusminus", "twosuperior", "threesuperior",
+ "tonos", "dieresistonos", "Alphatonos", "periodcentered",
+ "Epsilontonos", "Etatonos", "Iotatonos", "guillemotright",
+ "Omicrontonos", "onehalf", "Upsilontonos", "Omegatonos",
+ "iotadieresistonos", "Alpha", "Beta", "Gamma", "Deltagreek", "Epsilon",
+ "Zeta", "Eta", "Theta", "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi",
+ "Omicron", "Pi", "Rho", _notdef, "Sigma", "Tau", "Upsilon", "Phi",
+ "Chi", "Psi", "Omegagreek", "Iotadieresis", "Upsilondieresis",
+ "alphatonos", "epsilontonos", "etatonos", "iotatonos",
+ "upsilondieresistonos", "alpha", "beta", "gamma", "delta", "epsilon",
+ "zeta", "eta", "theta", "iota", "kappa", "lambda", "mugreek", "nu",
+ "xi", "omicron", "pi", "rho", "sigmafinal", "sigma", "tau", "upsilon",
+ "phi", "chi", "psi", "omega", "iotadieresis", "upsilondieresis",
+ "omicrontonos", "upsilontonos", "omegatonos", _notdef,
+};
+
+static const struct { unsigned short u, c; } iso8859_7_from_unicode[] = {
+ {0x00a0,160}, {0x00a3,163}, {0x00a6,166}, {0x00a7,167}, {0x00a8,168},
+ {0x00a9,169}, {0x00ab,171}, {0x00ac,172}, {0x00ad,173}, {0x00b0,176},
+ {0x00b1,177}, {0x00b2,178}, {0x00b3,179}, {0x00b7,183}, {0x00bb,187},
+ {0x00bd,189}, {0x037a,170}, {0x0384,180}, {0x0385,181}, {0x0386,182},
+ {0x0388,184}, {0x0389,185}, {0x038a,186}, {0x038c,188}, {0x038e,190},
+ {0x038f,191}, {0x0390,192}, {0x0391,193}, {0x0392,194}, {0x0393,195},
+ {0x0394,196}, {0x0395,197}, {0x0396,198}, {0x0397,199}, {0x0398,200},
+ {0x0399,201}, {0x039a,202}, {0x039b,203}, {0x039c,204}, {0x039d,205},
+ {0x039e,206}, {0x039f,207}, {0x03a0,208}, {0x03a1,209}, {0x03a3,211},
+ {0x03a4,212}, {0x03a5,213}, {0x03a6,214}, {0x03a7,215}, {0x03a8,216},
+ {0x03a9,217}, {0x03aa,218}, {0x03ab,219}, {0x03ac,220}, {0x03ad,221},
+ {0x03ae,222}, {0x03af,223}, {0x03b0,224}, {0x03b1,225}, {0x03b2,226},
+ {0x03b3,227}, {0x03b4,228}, {0x03b5,229}, {0x03b6,230}, {0x03b7,231},
+ {0x03b8,232}, {0x03b9,233}, {0x03ba,234}, {0x03bb,235}, {0x03bc,236},
+ {0x03bd,237}, {0x03be,238}, {0x03bf,239}, {0x03c0,240}, {0x03c1,241},
+ {0x03c2,242}, {0x03c3,243}, {0x03c4,244}, {0x03c5,245}, {0x03c6,246},
+ {0x03c7,247}, {0x03c8,248}, {0x03c9,249}, {0x03ca,250}, {0x03cb,251},
+ {0x03cc,252}, {0x03cd,253}, {0x03ce,254}, {0x2015,175}, {0x2018,161},
+ {0x2019,162}, {0x20ac,164},
+};
diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c
index 6be72250..69044c53 100644
--- a/source/pdf/pdf-font.c
+++ b/source/pdf/pdf-font.c
@@ -2147,9 +2147,51 @@ pdf_add_cid_font(fz_context *ctx, pdf_document *doc, fz_font *font)
return fref;
}
-/* Creates simple font */
+/* Create simple (8-bit encoding) fonts */
+
+static void
+pdf_add_simple_font_encoding_imp(fz_context *ctx, pdf_document *doc, pdf_obj *font, const char * const glyph_names[])
+{
+ pdf_obj *enc, *diff;
+ int i, last;
+
+ pdf_dict_put_drop(ctx, font, PDF_NAME_Encoding, enc = pdf_new_dict(ctx, doc, 3));
+ pdf_dict_put(ctx, enc, PDF_NAME_BaseEncoding, PDF_NAME_WinAnsiEncoding);
+ pdf_dict_put_drop(ctx, enc, PDF_NAME_Differences, diff = pdf_new_array(ctx, doc, 129));
+ last = 0;
+ for (i = 128; i < 256; ++i)
+ {
+ const char *glyph = glyph_names[i-128];
+ if (glyph)
+ {
+ if (last != i-1)
+ pdf_array_push_int(ctx, diff, i);
+ last = i;
+ pdf_array_push_name(ctx, diff, glyph);
+ }
+ }
+}
+
+static void
+pdf_add_simple_font_encoding(fz_context *ctx, pdf_document *doc, pdf_obj *fobj, int encoding)
+{
+ switch (encoding)
+ {
+ default:
+ case PDF_SIMPLE_ENCODING_LATIN:
+ pdf_dict_put(ctx, fobj, PDF_NAME_Encoding, PDF_NAME_WinAnsiEncoding);
+ break;
+ case PDF_SIMPLE_ENCODING_GREEK:
+ pdf_add_simple_font_encoding_imp(ctx, doc, fobj, pdf_glyph_name_from_iso8859_7);
+ break;
+ case PDF_SIMPLE_ENCODING_CYRILLIC:
+ pdf_add_simple_font_encoding_imp(ctx, doc, fobj, pdf_glyph_name_from_koi8u);
+ break;
+ }
+}
+
pdf_obj *
-pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font)
+pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font, int encoding)
{
pdf_obj *fobj = NULL;
pdf_obj *fref = NULL;
@@ -2174,7 +2216,7 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font)
/* Before we add this font as a resource check if the same font
* already exists in our resources for this doc. If yes, then
* hand back that reference */
- fref = pdf_find_font_resource(ctx, doc, PDF_SIMPLE_FONT_RESOURCE, 0, font->buffer, digest);
+ fref = pdf_find_font_resource(ctx, doc, PDF_SIMPLE_FONT_RESOURCE, encoding, font->buffer, digest);
if (fref == NULL)
{
fobj = pdf_new_dict(ctx, doc, 10);
@@ -2184,7 +2226,6 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font)
case TYPE1: pdf_dict_put(ctx, fobj, PDF_NAME_Subtype, PDF_NAME_Type1); break;
case TRUETYPE: pdf_dict_put(ctx, fobj, PDF_NAME_Subtype, PDF_NAME_TrueType); break;
}
- pdf_dict_put(ctx, fobj, PDF_NAME_Encoding, PDF_NAME_WinAnsiEncoding);
if (!is_builtin_font(ctx, font))
{
@@ -2213,6 +2254,8 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font)
pdf_dict_put_name(ctx, fobj, PDF_NAME_BaseFont, clean_font_name(font->name));
}
+ pdf_add_simple_font_encoding(ctx, doc, fobj, encoding);
+
fref = pdf_add_object(ctx, doc, fobj);
/* Add ref to our font resource hash table. */
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 911fcf35..8b9cd29d 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -3251,10 +3251,16 @@ static void ffi_PDFDocument_addSimpleFont(js_State *J)
fz_context *ctx = js_getcontext(J);
pdf_document *pdf = js_touserdata(J, 0, "pdf_document");
fz_font *font = js_touserdata(J, 1, "fz_font");
+ const char *encname = js_tostring(J, 2);
pdf_obj *ind = NULL;
+ int enc = PDF_SIMPLE_ENCODING_LATIN;
+
+ if (!strcmp(encname, "Latin")) enc = PDF_SIMPLE_ENCODING_LATIN;
+ else if (!strcmp(encname, "Greek")) enc = PDF_SIMPLE_ENCODING_GREEK;
+ else if (!strcmp(encname, "Cyrillic")) enc = PDF_SIMPLE_ENCODING_CYRILLIC;
fz_try(ctx)
- ind = pdf_add_simple_font(ctx, pdf, font);
+ ind = pdf_add_simple_font(ctx, pdf, font, enc);
fz_catch(ctx)
rethrow(J);
@@ -4697,7 +4703,7 @@ int murun_main(int argc, char **argv)
jsB_propfun(J, "PDFDocument.addObject", ffi_PDFDocument_addObject, 1);
jsB_propfun(J, "PDFDocument.addStream", ffi_PDFDocument_addStream, 2);
jsB_propfun(J, "PDFDocument.addRawStream", ffi_PDFDocument_addRawStream, 2);
- jsB_propfun(J, "PDFDocument.addSimpleFont", ffi_PDFDocument_addSimpleFont, 1);
+ jsB_propfun(J, "PDFDocument.addSimpleFont", ffi_PDFDocument_addSimpleFont, 2);
jsB_propfun(J, "PDFDocument.addCJKFont", ffi_PDFDocument_addCJKFont, 2);
jsB_propfun(J, "PDFDocument.addFont", ffi_PDFDocument_addFont, 1);
jsB_propfun(J, "PDFDocument.addImage", ffi_PDFDocument_addImage, 1);
diff --git a/source/tools/pdfcreate.c b/source/tools/pdfcreate.c
index 93dff5fb..032f32d5 100644
--- a/source/tools/pdfcreate.c
+++ b/source/tools/pdfcreate.c
@@ -32,10 +32,10 @@ static void usage(void)
static fz_context *ctx = NULL;
static pdf_document *doc = NULL;
-static void add_font_res(pdf_obj *resources, char *name, char *path)
+static void add_font_res(pdf_obj *resources, char *name, char *path, char *encname)
{
const unsigned char *data;
- int size;
+ int size, enc;
fz_font *font;
pdf_obj *subres, *ref;
@@ -52,7 +52,15 @@ static void add_font_res(pdf_obj *resources, char *name, char *path)
pdf_dict_put_drop(ctx, resources, PDF_NAME_Font, subres);
}
- ref = pdf_add_simple_font(ctx, doc, font);
+ enc = PDF_SIMPLE_ENCODING_LATIN;
+ if (encname)
+ {
+ if (!strcmp(encname, "Latin")) enc = PDF_SIMPLE_ENCODING_LATIN;
+ else if (!strcmp(encname, "Greek")) enc = PDF_SIMPLE_ENCODING_GREEK;
+ else if (!strcmp(encname, "Cyrillic")) enc = PDF_SIMPLE_ENCODING_CYRILLIC;
+ }
+
+ ref = pdf_add_simple_font(ctx, doc, font, enc);
pdf_dict_puts(ctx, subres, name, ref);
pdf_drop_obj(ctx, ref);
@@ -125,7 +133,7 @@ static void create_page(char *input)
int rotate = 0;
char line[4096];
- char *s, *p;
+ char *s, *t, *p;
fz_stream *stm;
fz_buffer *contents;
@@ -156,7 +164,8 @@ static void create_page(char *input)
else if (!strcmp(s, "%%Font"))
{
s = fz_strsep(&p, " ");
- add_font_res(resources, s, p);
+ t = fz_strsep(&p, " ");
+ add_font_res(resources, s, t, p);
}
else if (!strcmp(s, "%%CJKFont"))
{
diff --git a/source/tools/pdfportfolio.c b/source/tools/pdfportfolio.c
index 9a0efc01..8e252d43 100644
--- a/source/tools/pdfportfolio.c
+++ b/source/tools/pdfportfolio.c
@@ -255,7 +255,7 @@ int pdfportfolio_main(int argc, char **argv)
data = fz_lookup_base14_font(ctx, "Times-Roman", &size);
font = fz_new_font_from_memory(ctx, "Times-Roman", data, size, 0, 0);
- font_obj = pdf_add_simple_font(ctx, doc, font);
+ font_obj = pdf_add_simple_font(ctx, doc, font, PDF_SIMPLE_ENCODING_LATIN);
fz_drop_font(ctx, font);
resources = pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, 1));