summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2016-05-31 16:14:11 +0200
committerTor Andersson <tor.andersson@artifex.com>2016-06-06 14:51:54 +0200
commitae64e55878766478f536a0b2158e0a29f5cf00ed (patch)
tree462718d0d959294e161e4f18fb9920a7017368b9 /source
parent238ade12c1c7689d65ee9899eb00f895fcbd9a6f (diff)
downloadmupdf-ae64e55878766478f536a0b2158e0a29f5cf00ed.tar.xz
Minimize the number of CMaps built in to the ones listed in the spec.
Omitting the unlisted UTF-8 and UTF-32 CMaps saves ~1M. Omitting the unlisted other CMaps saves ~200k. Define CJK_CMAPS=0 to skip all CMaps. Define EXTRA_CMAPS=1 to include the various other CMaps. Define UTF8_CMAPS=1 and UTF32_CMAPS to include the UTF-8 and UTF-32 CMaps.
Diffstat (limited to 'source')
-rw-r--r--source/pdf/pdf-cmap-load.c2
-rw-r--r--source/pdf/pdf-cmap-table.c260
2 files changed, 168 insertions, 94 deletions
diff --git a/source/pdf/pdf-cmap-load.c b/source/pdf/pdf-cmap-load.c
index 525c0dc1..1c72e139 100644
--- a/source/pdf/pdf-cmap-load.c
+++ b/source/pdf/pdf-cmap-load.c
@@ -106,7 +106,7 @@ pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
* Load predefined CMap from system.
*/
pdf_cmap *
-pdf_load_system_cmap(fz_context *ctx, char *cmap_name)
+pdf_load_system_cmap(fz_context *ctx, const char *cmap_name)
{
pdf_cmap *usecmap;
pdf_cmap *cmap;
diff --git a/source/pdf/pdf-cmap-table.c b/source/pdf/pdf-cmap-table.c
index 9bd89ba5..65c0eb15 100644
--- a/source/pdf/pdf-cmap-table.c
+++ b/source/pdf/pdf-cmap-table.c
@@ -1,13 +1,122 @@
#include "mupdf/pdf.h"
-#ifndef NOCJK
+#ifdef NOCJK
+#define CJK_CMAPS 0
+#endif
+
+#ifndef CJK_CMAPS
+#define CJK_CMAPS 1
+#endif
+
+#ifndef EXTRA_CMAPS
+#define EXTRA_CMAPS 0
+#endif
+#ifndef UTF8_CMAPS
+#define UTF8_CMAPS 0
+#endif
+#ifndef UTF32_CMAPS
+#define UTF32_CMAPS 0
+#endif
+
+#if CJK_CMAPS
-#include "gen_cmap_cns.h"
#include "gen_cmap_gb.h"
+#include "gen_cmap_cns.h"
#include "gen_cmap_japan.h"
#include "gen_cmap_korea.h"
-static const struct { char *name; pdf_cmap *cmap; } cmap_table[] =
+struct table { const char *name; pdf_cmap *cmap; };
+
+static const struct table table_unicode[] =
+{
+ {"Adobe-CNS1-UCS2",&cmap_Adobe_CNS1_UCS2},
+ {"Adobe-GB1-UCS2",&cmap_Adobe_GB1_UCS2},
+ {"Adobe-Japan1-UCS2",&cmap_Adobe_Japan1_UCS2},
+ {"Adobe-Korea1-UCS2",&cmap_Adobe_Korea1_UCS2},
+};
+
+static const struct table table_gb[] =
+{
+ {"GB-EUC-H",&cmap_GB_EUC_H},
+ {"GB-EUC-V",&cmap_GB_EUC_V},
+ {"GBK-EUC-H",&cmap_GBK_EUC_H},
+ {"GBK-EUC-V",&cmap_GBK_EUC_V},
+ {"GBK2K-H",&cmap_GBK2K_H},
+ {"GBK2K-V",&cmap_GBK2K_V},
+ {"GBKp-EUC-H",&cmap_GBKp_EUC_H},
+ {"GBKp-EUC-V",&cmap_GBKp_EUC_V},
+ {"GBpc-EUC-H",&cmap_GBpc_EUC_H},
+ {"GBpc-EUC-V",&cmap_GBpc_EUC_V},
+ {"UniGB-UCS2-H",&cmap_UniGB_UCS2_H},
+ {"UniGB-UCS2-V",&cmap_UniGB_UCS2_V},
+ {"UniGB-UTF16-H",&cmap_UniGB_UTF16_H},
+ {"UniGB-UTF16-V",&cmap_UniGB_UTF16_V},
+ {"UniGB-X",&cmap_UniGB_X},
+};
+
+static const struct table table_cns[] =
+{
+ {"B5pc-H",&cmap_B5pc_H},
+ {"B5pc-V",&cmap_B5pc_V},
+ {"CNS-EUC-H",&cmap_CNS_EUC_H},
+ {"CNS-EUC-V",&cmap_CNS_EUC_V},
+ {"ETen-B5-H",&cmap_ETen_B5_H},
+ {"ETen-B5-V",&cmap_ETen_B5_V},
+ {"ETenms-B5-H",&cmap_ETenms_B5_H},
+ {"ETenms-B5-V",&cmap_ETenms_B5_V},
+ {"HKscs-B5-H",&cmap_HKscs_B5_H},
+ {"HKscs-B5-V",&cmap_HKscs_B5_V},
+ {"UniCNS-UCS2-H",&cmap_UniCNS_UCS2_H},
+ {"UniCNS-UCS2-V",&cmap_UniCNS_UCS2_V},
+ {"UniCNS-UTF16-H",&cmap_UniCNS_UTF16_H},
+ {"UniCNS-UTF16-V",&cmap_UniCNS_UTF16_V},
+ {"UniCNS-X",&cmap_UniCNS_X},
+};
+
+static const struct table table_japan[] =
+{
+ {"83pv-RKSJ-H",&cmap_83pv_RKSJ_H},
+ {"90ms-RKSJ-H",&cmap_90ms_RKSJ_H},
+ {"90ms-RKSJ-V",&cmap_90ms_RKSJ_V},
+ {"90msp-RKSJ-H",&cmap_90msp_RKSJ_H},
+ {"90msp-RKSJ-V",&cmap_90msp_RKSJ_V},
+ {"90pv-RKSJ-H",&cmap_90pv_RKSJ_H},
+ {"Add-RKSJ-H",&cmap_Add_RKSJ_H},
+ {"Add-RKSJ-V",&cmap_Add_RKSJ_V},
+ {"EUC-H",&cmap_EUC_H},
+ {"EUC-V",&cmap_EUC_V},
+ {"Ext-RKSJ-H",&cmap_Ext_RKSJ_H},
+ {"Ext-RKSJ-V",&cmap_Ext_RKSJ_V},
+ {"H",&cmap_H},
+ {"UniJIS-UCS2-H",&cmap_UniJIS_UCS2_H},
+ {"UniJIS-UCS2-HW-H",&cmap_UniJIS_UCS2_HW_H},
+ {"UniJIS-UCS2-HW-V",&cmap_UniJIS_UCS2_HW_V},
+ {"UniJIS-UCS2-V",&cmap_UniJIS_UCS2_V},
+ {"UniJIS-UTF16-H",&cmap_UniJIS_UTF16_H},
+ {"UniJIS-UTF16-V",&cmap_UniJIS_UTF16_V},
+ {"UniJIS-X",&cmap_UniJIS_X},
+ {"UniJIS-X16",&cmap_UniJIS_X16},
+ {"V",&cmap_V},
+};
+
+static const struct table table_korea[] =
+{
+ {"KSC-EUC-H",&cmap_KSC_EUC_H},
+ {"KSC-EUC-V",&cmap_KSC_EUC_V},
+ {"KSCms-UHC-H",&cmap_KSCms_UHC_H},
+ {"KSCms-UHC-HW-H",&cmap_KSCms_UHC_HW_H},
+ {"KSCms-UHC-HW-V",&cmap_KSCms_UHC_HW_V},
+ {"KSCms-UHC-V",&cmap_KSCms_UHC_V},
+ {"KSCpc-EUC-H",&cmap_KSCpc_EUC_H},
+ {"UniKS-UCS2-H",&cmap_UniKS_UCS2_H},
+ {"UniKS-UCS2-V",&cmap_UniKS_UCS2_V},
+ {"UniKS-UTF16-H",&cmap_UniKS_UTF16_H},
+ {"UniKS-UTF16-V",&cmap_UniKS_UTF16_V},
+ {"UniKS-X",&cmap_UniKS_X},
+};
+
+#if EXTRA_CMAPS
+static const struct table table_extra[] =
{
{"78-EUC-H",&cmap_78_EUC_H},
{"78-EUC-V",&cmap_78_EUC_V},
@@ -17,16 +126,8 @@ static const struct { char *name; pdf_cmap *cmap; } cmap_table[] =
{"78-V",&cmap_78_V},
{"78ms-RKSJ-H",&cmap_78ms_RKSJ_H},
{"78ms-RKSJ-V",&cmap_78ms_RKSJ_V},
- {"83pv-RKSJ-H",&cmap_83pv_RKSJ_H},
- {"90ms-RKSJ-H",&cmap_90ms_RKSJ_H},
- {"90ms-RKSJ-V",&cmap_90ms_RKSJ_V},
- {"90msp-RKSJ-H",&cmap_90msp_RKSJ_H},
- {"90msp-RKSJ-V",&cmap_90msp_RKSJ_V},
- {"90pv-RKSJ-H",&cmap_90pv_RKSJ_H},
{"90pv-RKSJ-V",&cmap_90pv_RKSJ_V},
{"Add-H",&cmap_Add_H},
- {"Add-RKSJ-H",&cmap_Add_RKSJ_H},
- {"Add-RKSJ-V",&cmap_Add_RKSJ_V},
{"Add-V",&cmap_Add_V},
{"Adobe-CNS1-0",&cmap_Adobe_CNS1_0},
{"Adobe-CNS1-1",&cmap_Adobe_CNS1_1},
@@ -35,14 +136,12 @@ static const struct { char *name; pdf_cmap *cmap; } cmap_table[] =
{"Adobe-CNS1-4",&cmap_Adobe_CNS1_4},
{"Adobe-CNS1-5",&cmap_Adobe_CNS1_5},
{"Adobe-CNS1-6",&cmap_Adobe_CNS1_6},
- {"Adobe-CNS1-UCS2",&cmap_Adobe_CNS1_UCS2},
{"Adobe-GB1-0",&cmap_Adobe_GB1_0},
{"Adobe-GB1-1",&cmap_Adobe_GB1_1},
{"Adobe-GB1-2",&cmap_Adobe_GB1_2},
{"Adobe-GB1-3",&cmap_Adobe_GB1_3},
{"Adobe-GB1-4",&cmap_Adobe_GB1_4},
{"Adobe-GB1-5",&cmap_Adobe_GB1_5},
- {"Adobe-GB1-UCS2",&cmap_Adobe_GB1_UCS2},
{"Adobe-Japan1-0",&cmap_Adobe_Japan1_0},
{"Adobe-Japan1-1",&cmap_Adobe_Japan1_1},
{"Adobe-Japan1-2",&cmap_Adobe_Japan1_2},
@@ -50,52 +149,27 @@ static const struct { char *name; pdf_cmap *cmap; } cmap_table[] =
{"Adobe-Japan1-4",&cmap_Adobe_Japan1_4},
{"Adobe-Japan1-5",&cmap_Adobe_Japan1_5},
{"Adobe-Japan1-6",&cmap_Adobe_Japan1_6},
- {"Adobe-Japan1-UCS2",&cmap_Adobe_Japan1_UCS2},
{"Adobe-Korea1-0",&cmap_Adobe_Korea1_0},
{"Adobe-Korea1-1",&cmap_Adobe_Korea1_1},
{"Adobe-Korea1-2",&cmap_Adobe_Korea1_2},
- {"Adobe-Korea1-UCS2",&cmap_Adobe_Korea1_UCS2},
{"B5-H",&cmap_B5_H},
{"B5-V",&cmap_B5_V},
- {"B5pc-H",&cmap_B5pc_H},
- {"B5pc-V",&cmap_B5pc_V},
- {"CNS-EUC-H",&cmap_CNS_EUC_H},
- {"CNS-EUC-V",&cmap_CNS_EUC_V},
{"CNS1-H",&cmap_CNS1_H},
{"CNS1-V",&cmap_CNS1_V},
{"CNS2-H",&cmap_CNS2_H},
{"CNS2-V",&cmap_CNS2_V},
{"ETHK-B5-H",&cmap_ETHK_B5_H},
{"ETHK-B5-V",&cmap_ETHK_B5_V},
- {"ETen-B5-H",&cmap_ETen_B5_H},
- {"ETen-B5-V",&cmap_ETen_B5_V},
- {"ETenms-B5-H",&cmap_ETenms_B5_H},
- {"ETenms-B5-V",&cmap_ETenms_B5_V},
- {"EUC-H",&cmap_EUC_H},
- {"EUC-V",&cmap_EUC_V},
{"Ext-H",&cmap_Ext_H},
- {"Ext-RKSJ-H",&cmap_Ext_RKSJ_H},
- {"Ext-RKSJ-V",&cmap_Ext_RKSJ_V},
{"Ext-V",&cmap_Ext_V},
- {"GB-EUC-H",&cmap_GB_EUC_H},
- {"GB-EUC-V",&cmap_GB_EUC_V},
{"GB-H",&cmap_GB_H},
{"GB-V",&cmap_GB_V},
- {"GBK-EUC-H",&cmap_GBK_EUC_H},
- {"GBK-EUC-V",&cmap_GBK_EUC_V},
- {"GBK2K-H",&cmap_GBK2K_H},
- {"GBK2K-V",&cmap_GBK2K_V},
- {"GBKp-EUC-H",&cmap_GBKp_EUC_H},
- {"GBKp-EUC-V",&cmap_GBKp_EUC_V},
{"GBT-EUC-H",&cmap_GBT_EUC_H},
{"GBT-EUC-V",&cmap_GBT_EUC_V},
{"GBT-H",&cmap_GBT_H},
{"GBT-V",&cmap_GBT_V},
{"GBTpc-EUC-H",&cmap_GBTpc_EUC_H},
{"GBTpc-EUC-V",&cmap_GBTpc_EUC_V},
- {"GBpc-EUC-H",&cmap_GBpc_EUC_H},
- {"GBpc-EUC-V",&cmap_GBpc_EUC_V},
- {"H",&cmap_H},
{"HKdla-B5-H",&cmap_HKdla_B5_H},
{"HKdla-B5-V",&cmap_HKdla_B5_V},
{"HKdlb-B5-H",&cmap_HKdlb_B5_H},
@@ -106,21 +180,12 @@ static const struct { char *name; pdf_cmap *cmap; } cmap_table[] =
{"HKm314-B5-V",&cmap_HKm314_B5_V},
{"HKm471-B5-H",&cmap_HKm471_B5_H},
{"HKm471-B5-V",&cmap_HKm471_B5_V},
- {"HKscs-B5-H",&cmap_HKscs_B5_H},
- {"HKscs-B5-V",&cmap_HKscs_B5_V},
{"Hankaku",&cmap_Hankaku},
{"Hiragana",&cmap_Hiragana},
- {"KSC-EUC-H",&cmap_KSC_EUC_H},
- {"KSC-EUC-V",&cmap_KSC_EUC_V},
{"KSC-H",&cmap_KSC_H},
{"KSC-Johab-H",&cmap_KSC_Johab_H},
{"KSC-Johab-V",&cmap_KSC_Johab_V},
{"KSC-V",&cmap_KSC_V},
- {"KSCms-UHC-H",&cmap_KSCms_UHC_H},
- {"KSCms-UHC-HW-H",&cmap_KSCms_UHC_HW_H},
- {"KSCms-UHC-HW-V",&cmap_KSCms_UHC_HW_V},
- {"KSCms-UHC-V",&cmap_KSCms_UHC_V},
- {"KSCpc-EUC-H",&cmap_KSCpc_EUC_H},
{"KSCpc-EUC-V",&cmap_KSCpc_EUC_V},
{"Katakana",&cmap_Katakana},
{"NWP-H",&cmap_NWP_H},
@@ -128,87 +193,96 @@ static const struct { char *name; pdf_cmap *cmap; } cmap_table[] =
{"RKSJ-H",&cmap_RKSJ_H},
{"RKSJ-V",&cmap_RKSJ_V},
{"Roman",&cmap_Roman},
- {"UniCNS-UCS2-H",&cmap_UniCNS_UCS2_H},
- {"UniCNS-UCS2-V",&cmap_UniCNS_UCS2_V},
- {"UniCNS-UTF16-H",&cmap_UniCNS_UTF16_H},
- {"UniCNS-UTF16-V",&cmap_UniCNS_UTF16_V},
- {"UniCNS-UTF32-H",&cmap_UniCNS_UTF32_H},
- {"UniCNS-UTF32-V",&cmap_UniCNS_UTF32_V},
+ {"UniJIS2004-UTF16-H",&cmap_UniJIS2004_UTF16_H},
+ {"UniJIS2004-UTF16-V",&cmap_UniJIS2004_UTF16_V},
+ {"UniJISPro-UCS2-HW-V",&cmap_UniJISPro_UCS2_HW_V},
+ {"UniJISPro-UCS2-V",&cmap_UniJISPro_UCS2_V},
+ {"WP-Symbol",&cmap_WP_Symbol},
+};
+#endif
+
+#if UTF8_CMAPS
+static const struct table table_utf8[] =
+{
{"UniCNS-UTF8-H",&cmap_UniCNS_UTF8_H},
{"UniCNS-UTF8-V",&cmap_UniCNS_UTF8_V},
- {"UniCNS-X",&cmap_UniCNS_X},
- {"UniGB-UCS2-H",&cmap_UniGB_UCS2_H},
- {"UniGB-UCS2-V",&cmap_UniGB_UCS2_V},
- {"UniGB-UTF16-H",&cmap_UniGB_UTF16_H},
- {"UniGB-UTF16-V",&cmap_UniGB_UTF16_V},
- {"UniGB-UTF32-H",&cmap_UniGB_UTF32_H},
- {"UniGB-UTF32-V",&cmap_UniGB_UTF32_V},
{"UniGB-UTF8-H",&cmap_UniGB_UTF8_H},
{"UniGB-UTF8-V",&cmap_UniGB_UTF8_V},
- {"UniGB-X",&cmap_UniGB_X},
- {"UniJIS-UCS2-H",&cmap_UniJIS_UCS2_H},
- {"UniJIS-UCS2-HW-H",&cmap_UniJIS_UCS2_HW_H},
- {"UniJIS-UCS2-HW-V",&cmap_UniJIS_UCS2_HW_V},
- {"UniJIS-UCS2-V",&cmap_UniJIS_UCS2_V},
- {"UniJIS-UTF16-H",&cmap_UniJIS_UTF16_H},
- {"UniJIS-UTF16-V",&cmap_UniJIS_UTF16_V},
- {"UniJIS-UTF32-H",&cmap_UniJIS_UTF32_H},
- {"UniJIS-UTF32-V",&cmap_UniJIS_UTF32_V},
{"UniJIS-UTF8-H",&cmap_UniJIS_UTF8_H},
{"UniJIS-UTF8-V",&cmap_UniJIS_UTF8_V},
- {"UniJIS-X",&cmap_UniJIS_X},
- {"UniJIS-X16",&cmap_UniJIS_X16},
- {"UniJIS-X32",&cmap_UniJIS_X32},
{"UniJIS-X8",&cmap_UniJIS_X8},
- {"UniJIS2004-UTF16-H",&cmap_UniJIS2004_UTF16_H},
- {"UniJIS2004-UTF16-V",&cmap_UniJIS2004_UTF16_V},
- {"UniJIS2004-UTF32-H",&cmap_UniJIS2004_UTF32_H},
- {"UniJIS2004-UTF32-V",&cmap_UniJIS2004_UTF32_V},
{"UniJIS2004-UTF8-H",&cmap_UniJIS2004_UTF8_H},
{"UniJIS2004-UTF8-V",&cmap_UniJIS2004_UTF8_V},
- {"UniJISPro-UCS2-HW-V",&cmap_UniJISPro_UCS2_HW_V},
- {"UniJISPro-UCS2-V",&cmap_UniJISPro_UCS2_V},
{"UniJISPro-UTF8-V",&cmap_UniJISPro_UTF8_V},
+ {"UniKS-UTF8-H",&cmap_UniKS_UTF8_H},
+ {"UniKS-UTF8-V",&cmap_UniKS_UTF8_V},
+};
+#endif
+
+#if UTF32_CMAPS
+static const struct table table_utf32[] =
+{
+ {"UniCNS-UTF32-H",&cmap_UniCNS_UTF32_H},
+ {"UniCNS-UTF32-V",&cmap_UniCNS_UTF32_V},
+ {"UniGB-UTF32-H",&cmap_UniGB_UTF32_H},
+ {"UniGB-UTF32-V",&cmap_UniGB_UTF32_V},
+ {"UniJIS-UTF32-H",&cmap_UniJIS_UTF32_H},
+ {"UniJIS-UTF32-V",&cmap_UniJIS_UTF32_V},
+ {"UniJIS-X32",&cmap_UniJIS_X32},
+ {"UniJIS2004-UTF32-H",&cmap_UniJIS2004_UTF32_H},
+ {"UniJIS2004-UTF32-V",&cmap_UniJIS2004_UTF32_V},
{"UniJISX0213-UTF32-H",&cmap_UniJISX0213_UTF32_H},
{"UniJISX0213-UTF32-V",&cmap_UniJISX0213_UTF32_V},
{"UniJISX02132004-UTF32-H",&cmap_UniJISX02132004_UTF32_H},
{"UniJISX02132004-UTF32-V",&cmap_UniJISX02132004_UTF32_V},
- {"UniKS-UCS2-H",&cmap_UniKS_UCS2_H},
- {"UniKS-UCS2-V",&cmap_UniKS_UCS2_V},
- {"UniKS-UTF16-H",&cmap_UniKS_UTF16_H},
- {"UniKS-UTF16-V",&cmap_UniKS_UTF16_V},
{"UniKS-UTF32-H",&cmap_UniKS_UTF32_H},
{"UniKS-UTF32-V",&cmap_UniKS_UTF32_V},
- {"UniKS-UTF8-H",&cmap_UniKS_UTF8_H},
- {"UniKS-UTF8-V",&cmap_UniKS_UTF8_V},
- {"UniKS-X",&cmap_UniKS_X},
- {"V",&cmap_V},
- {"WP-Symbol",&cmap_WP_Symbol},
};
+#endif
-pdf_cmap *
-pdf_load_builtin_cmap(fz_context *ctx, char *cmap_name)
+static pdf_cmap *
+pdf_load_builtin_cmap_imp(const struct table *table, int r, const char *name)
{
int l = 0;
- int r = nelem(cmap_table) - 1;
while (l <= r)
{
int m = (l + r) >> 1;
- int c = strcmp(cmap_name, cmap_table[m].name);
+ int c = strcmp(name, table[m].name);
if (c < 0)
r = m - 1;
else if (c > 0)
l = m + 1;
else
- return cmap_table[m].cmap;
+ return table[m].cmap;
}
return NULL;
}
+pdf_cmap *
+pdf_load_builtin_cmap(fz_context *ctx, const char *name)
+{
+ pdf_cmap *cmap = NULL;
+ if (!cmap) cmap = pdf_load_builtin_cmap_imp(table_unicode, nelem(table_unicode)-1, name);
+ if (!cmap) cmap = pdf_load_builtin_cmap_imp(table_gb, nelem(table_gb)-1, name);
+ if (!cmap) cmap = pdf_load_builtin_cmap_imp(table_cns, nelem(table_cns)-1, name);
+ if (!cmap) cmap = pdf_load_builtin_cmap_imp(table_japan, nelem(table_japan)-1, name);
+ if (!cmap) cmap = pdf_load_builtin_cmap_imp(table_korea, nelem(table_korea)-1, name);
+#if EXTRA_CMAPS
+ if (!cmap) cmap = pdf_load_builtin_cmap_imp(table_extra, nelem(table_extra)-1, name);
+#endif
+#if UTF8_CMAPS
+ if (!cmap) cmap = pdf_load_builtin_cmap_imp(table_utf8, nelem(table_utf8)-1, name);
+#endif
+#if UTF32_CMAPS
+ if (!cmap) cmap = pdf_load_builtin_cmap_imp(table_utf32, nelem(table_utf32)-1, name);
+#endif
+ return cmap;
+}
+
#else
pdf_cmap *
-pdf_load_builtin_cmap(fz_context *ctx, char *cmap_name)
+pdf_load_builtin_cmap(fz_context *ctx, const char *name)
{
return NULL;
}