summaryrefslogtreecommitdiff
path: root/pdf/pdf_cmap_parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'pdf/pdf_cmap_parse.c')
-rw-r--r--pdf/pdf_cmap_parse.c490
1 files changed, 490 insertions, 0 deletions
diff --git a/pdf/pdf_cmap_parse.c b/pdf/pdf_cmap_parse.c
new file mode 100644
index 00000000..d899af0e
--- /dev/null
+++ b/pdf/pdf_cmap_parse.c
@@ -0,0 +1,490 @@
+#include "fitz.h"
+#include "mupdf.h"
+
+/*
+ * CMap parser
+ */
+
+enum
+{
+ TUSECMAP = PDF_NTOKENS,
+ TBEGINCODESPACERANGE,
+ TENDCODESPACERANGE,
+ TBEGINBFCHAR,
+ TENDBFCHAR,
+ TBEGINBFRANGE,
+ TENDBFRANGE,
+ TBEGINCIDCHAR,
+ TENDCIDCHAR,
+ TBEGINCIDRANGE,
+ TENDCIDRANGE,
+ TENDCMAP
+};
+
+static int
+pdf_cmaptokenfromkeyword(char *key)
+{
+ if (!strcmp(key, "usecmap")) return TUSECMAP;
+ if (!strcmp(key, "begincodespacerange")) return TBEGINCODESPACERANGE;
+ if (!strcmp(key, "endcodespacerange")) return TENDCODESPACERANGE;
+ if (!strcmp(key, "beginbfchar")) return TBEGINBFCHAR;
+ if (!strcmp(key, "endbfchar")) return TENDBFCHAR;
+ if (!strcmp(key, "beginbfrange")) return TBEGINBFRANGE;
+ if (!strcmp(key, "endbfrange")) return TENDBFRANGE;
+ if (!strcmp(key, "begincidchar")) return TBEGINCIDCHAR;
+ if (!strcmp(key, "endcidchar")) return TENDCIDCHAR;
+ if (!strcmp(key, "begincidrange")) return TBEGINCIDRANGE;
+ if (!strcmp(key, "endcidrange")) return TENDCIDRANGE;
+ if (!strcmp(key, "endcmap")) return TENDCMAP;
+ return PDF_TKEYWORD;
+}
+
+static int
+pdf_codefromstring(char *buf, int len)
+{
+ int a = 0;
+ while (len--)
+ a = (a << 8) | *(unsigned char *)buf++;
+ return a;
+}
+
+static fz_error
+pdf_lexcmap(int *tok, fz_stream *file, char *buf, int n, int *sl)
+{
+ fz_error error;
+
+ error = pdf_lex(tok, file, buf, n, sl);
+ if (error)
+ return fz_rethrow(error, "cannot parse cmap token");
+
+ if (*tok == PDF_TKEYWORD)
+ *tok = pdf_cmaptokenfromkeyword(buf);
+
+ return fz_okay;
+}
+
+static fz_error
+pdf_parsecmapname(pdf_cmap *cmap, fz_stream *file)
+{
+ fz_error error;
+ char buf[256];
+ int tok;
+ int len;
+
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == PDF_TNAME)
+ fz_strlcpy(cmap->cmapname, buf, sizeof(cmap->cmapname));
+ else
+ fz_warn("expected name after CMapName in cmap");
+
+ return fz_okay;
+}
+
+static fz_error
+pdf_parsewmode(pdf_cmap *cmap, fz_stream *file)
+{
+ fz_error error;
+ char buf[256];
+ int tok;
+ int len;
+
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == PDF_TINT)
+ pdf_setwmode(cmap, atoi(buf));
+ else
+ fz_warn("expected integer after WMode in cmap");
+
+ return fz_okay;
+}
+
+static fz_error
+pdf_parsecodespacerange(pdf_cmap *cmap, fz_stream *file)
+{
+ fz_error error;
+ char buf[256];
+ int tok;
+ int len;
+ int lo, hi;
+
+ while (1)
+ {
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == TENDCODESPACERANGE)
+ return fz_okay;
+
+ else if (tok == PDF_TSTRING)
+ {
+ lo = pdf_codefromstring(buf, len);
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+ if (tok == PDF_TSTRING)
+ {
+ hi = pdf_codefromstring(buf, len);
+ pdf_addcodespace(cmap, lo, hi, len);
+ }
+ else break;
+ }
+
+ else break;
+ }
+
+ return fz_throw("expected string or endcodespacerange");
+}
+
+static fz_error
+pdf_parsecidrange(pdf_cmap *cmap, fz_stream *file)
+{
+ fz_error error;
+ char buf[256];
+ int tok;
+ int len;
+ int lo, hi, dst;
+
+ while (1)
+ {
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == TENDCIDRANGE)
+ return fz_okay;
+
+ else if (tok != PDF_TSTRING)
+ return fz_throw("expected string or endcidrange");
+
+ lo = pdf_codefromstring(buf, len);
+
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+ if (tok != PDF_TSTRING)
+ return fz_throw("expected string");
+
+ hi = pdf_codefromstring(buf, len);
+
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+ if (tok != PDF_TINT)
+ return fz_throw("expected integer");
+
+ dst = atoi(buf);
+
+ pdf_maprangetorange(cmap, lo, hi, dst);
+ }
+}
+
+static fz_error
+pdf_parsecidchar(pdf_cmap *cmap, fz_stream *file)
+{
+ fz_error error;
+ char buf[256];
+ int tok;
+ int len;
+ int src, dst;
+
+ while (1)
+ {
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == TENDCIDCHAR)
+ return fz_okay;
+
+ else if (tok != PDF_TSTRING)
+ return fz_throw("expected string or endcidchar");
+
+ src = pdf_codefromstring(buf, len);
+
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+ if (tok != PDF_TINT)
+ return fz_throw("expected integer");
+
+ dst = atoi(buf);
+
+ pdf_maprangetorange(cmap, src, src, dst);
+ }
+}
+
+static fz_error
+pdf_parsebfrangearray(pdf_cmap *cmap, fz_stream *file, int lo, int hi)
+{
+ fz_error error;
+ char buf[256];
+ int tok;
+ int len;
+ int dst[256];
+ int i;
+
+ while (1)
+ {
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == PDF_TCARRAY)
+ return fz_okay;
+
+ /* Note: does not handle [ /Name /Name ... ] */
+ else if (tok != PDF_TSTRING)
+ return fz_throw("expected string or ]");
+
+ if (len / 2)
+ {
+ for (i = 0; i < len / 2; i++)
+ dst[i] = pdf_codefromstring(buf + i * 2, 2);
+
+ pdf_maponetomany(cmap, lo, dst, len / 2);
+ }
+
+ lo ++;
+ }
+}
+
+static fz_error
+pdf_parsebfrange(pdf_cmap *cmap, fz_stream *file)
+{
+ fz_error error;
+ char buf[256];
+ int tok;
+ int len;
+ int lo, hi, dst;
+
+ while (1)
+ {
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == TENDBFRANGE)
+ return fz_okay;
+
+ else if (tok != PDF_TSTRING)
+ return fz_throw("expected string or endbfrange");
+
+ lo = pdf_codefromstring(buf, len);
+
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+ if (tok != PDF_TSTRING)
+ return fz_throw("expected string");
+
+ hi = pdf_codefromstring(buf, len);
+
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == PDF_TSTRING)
+ {
+ if (len == 2)
+ {
+ dst = pdf_codefromstring(buf, len);
+ pdf_maprangetorange(cmap, lo, hi, dst);
+ }
+ else
+ {
+ int dststr[256];
+ int i;
+
+ if (len / 2)
+ {
+ for (i = 0; i < len / 2; i++)
+ dststr[i] = pdf_codefromstring(buf + i * 2, 2);
+
+ while (lo <= hi)
+ {
+ dststr[i-1] ++;
+ pdf_maponetomany(cmap, lo, dststr, i);
+ lo ++;
+ }
+ }
+ }
+ }
+
+ else if (tok == PDF_TOARRAY)
+ {
+ error = pdf_parsebfrangearray(cmap, file, lo, hi);
+ if (error)
+ return fz_rethrow(error, "cannot map bfrange");
+ }
+
+ else
+ {
+ return fz_throw("expected string or array or endbfrange");
+ }
+ }
+}
+
+static fz_error
+pdf_parsebfchar(pdf_cmap *cmap, fz_stream *file)
+{
+ fz_error error;
+ char buf[256];
+ int tok;
+ int len;
+ int dst[256];
+ int src;
+ int i;
+
+ while (1)
+ {
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+
+ if (tok == TENDBFCHAR)
+ return fz_okay;
+
+ else if (tok != PDF_TSTRING)
+ return fz_throw("expected string or endbfchar");
+
+ src = pdf_codefromstring(buf, len);
+
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ return fz_rethrow(error, "syntaxerror in cmap");
+ /* Note: does not handle /dstName */
+ if (tok != PDF_TSTRING)
+ return fz_throw("expected string");
+
+ if (len / 2)
+ {
+ for (i = 0; i < len / 2; i++)
+ dst[i] = pdf_codefromstring(buf + i * 2, 2);
+ pdf_maponetomany(cmap, src, dst, i);
+ }
+ }
+}
+
+fz_error
+pdf_parsecmap(pdf_cmap **cmapp, fz_stream *file)
+{
+ fz_error error;
+ pdf_cmap *cmap;
+ char key[64];
+ char buf[256];
+ int tok;
+ int len;
+
+ cmap = pdf_newcmap();
+
+ strcpy(key, ".notdef");
+
+ while (1)
+ {
+ error = pdf_lexcmap(&tok, file, buf, sizeof buf, &len);
+ if (error)
+ {
+ error = fz_rethrow(error, "syntaxerror in cmap");
+ goto cleanup;
+ }
+
+ if (tok == PDF_TEOF || tok == TENDCMAP)
+ break;
+
+ else if (tok == PDF_TNAME)
+ {
+ if (!strcmp(buf, "CMapName"))
+ {
+ error = pdf_parsecmapname(cmap, file);
+ if (error)
+ {
+ error = fz_rethrow(error, "syntaxerror in cmap after CMapName");
+ goto cleanup;
+ }
+ }
+ else if (!strcmp(buf, "WMode"))
+ {
+ error = pdf_parsewmode(cmap, file);
+ if (error)
+ {
+ error = fz_rethrow(error, "syntaxerror in cmap after WMode");
+ goto cleanup;
+ }
+ }
+ else
+ fz_strlcpy(key, buf, sizeof key);
+ }
+
+ else if (tok == TUSECMAP)
+ {
+ fz_strlcpy(cmap->usecmapname, key, sizeof(cmap->usecmapname));
+ }
+
+ else if (tok == TBEGINCODESPACERANGE)
+ {
+ error = pdf_parsecodespacerange(cmap, file);
+ if (error)
+ {
+ error = fz_rethrow(error, "syntaxerror in cmap codespacerange");
+ goto cleanup;
+ }
+ }
+
+ else if (tok == TBEGINBFCHAR)
+ {
+ error = pdf_parsebfchar(cmap, file);
+ if (error)
+ {
+ error = fz_rethrow(error, "syntaxerror in cmap bfchar");
+ goto cleanup;
+ }
+ }
+
+ else if (tok == TBEGINCIDCHAR)
+ {
+ error = pdf_parsecidchar(cmap, file);
+ if (error)
+ {
+ error = fz_rethrow(error, "syntaxerror in cmap cidchar");
+ goto cleanup;
+ }
+ }
+
+ else if (tok == TBEGINBFRANGE)
+ {
+ error = pdf_parsebfrange(cmap, file);
+ if (error)
+ {
+ error = fz_rethrow(error, "syntaxerror in cmap bfrange");
+ goto cleanup;
+ }
+ }
+
+ else if (tok == TBEGINCIDRANGE)
+ {
+ error = pdf_parsecidrange(cmap, file);
+ if (error)
+ {
+ error = fz_rethrow(error, "syntaxerror in cmap cidrange");
+ goto cleanup;
+ }
+ }
+
+ /* ignore everything else */
+ }
+
+ pdf_sortcmap(cmap);
+
+ *cmapp = cmap;
+ return fz_okay;
+
+cleanup:
+ pdf_dropcmap(cmap);
+ return error; /* already rethrown */
+}