summaryrefslogtreecommitdiff
path: root/scripts/cmapflatten.py
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2018-04-04 16:36:21 +0200
committerTor Andersson <tor.andersson@artifex.com>2018-04-25 12:26:32 +0200
commita6d083bb776ecd498e57450ef84c20e39ae604cf (patch)
treea1f4399f011eb2b59e21f0588d322690a6ab14f4 /scripts/cmapflatten.py
parent84cf672da90dfdaa2dfd2742cc69fa0bad268081 (diff)
downloadmupdf-a6d083bb776ecd498e57450ef84c20e39ae604cf.tar.xz
Add CMap processing scripts, and turn cmapdump into mutool.
A dumping script written in python. A flattening script written in python (for easier editing). A subsetting script written in shell to minimize CMaps by reusing subsets. Use 'mutool cmapdump' to bootstrap or verify cmap dumps.
Diffstat (limited to 'scripts/cmapflatten.py')
-rw-r--r--scripts/cmapflatten.py108
1 files changed, 108 insertions, 0 deletions
diff --git a/scripts/cmapflatten.py b/scripts/cmapflatten.py
new file mode 100644
index 00000000..8bb2193a
--- /dev/null
+++ b/scripts/cmapflatten.py
@@ -0,0 +1,108 @@
+# Parse a Uni* CMap file and flatten it.
+#
+# The Uni* CMap files only have 'cidchar' and 'cidrange' sections, never
+# 'bfchar' or 'bfrange'.
+
+import sys
+
+def flattencmap(filename):
+ codespacerange = []
+ usecmap = ""
+ cmapname = ""
+ cmapversion = "1.0"
+ csi_registry = "(Adobe)"
+ csi_ordering = "(Unknown)"
+ csi_supplement = 1
+ wmode = 0
+
+ map = {}
+
+ def tocode(s):
+ if s[0] == '<' and s[-1] == '>':
+ return int(s[1:-1], 16)
+ return int(s, 10)
+
+ def map_cidchar(lo, v):
+ map[lo] = v
+
+ def map_cidrange(lo, hi, v):
+ while lo <= hi:
+ map[lo] = v
+ lo = lo + 1
+ v = v + 1
+
+ current = None
+ for line in open(filename, "r").readlines():
+ if line[0] == '%':
+ continue
+ line = line.strip().split()
+ if len(line) == 0:
+ continue
+ if line[0] == '/CMapVersion': cmapversion = line[1]
+ elif line[0] == '/CMapName': cmapname = line[1][1:]
+ elif line[0] == '/WMode': wmode = int(line[1])
+ elif line[0] == '/Registry': csi_registry = line[1]
+ elif line[0] == '/Ordering': csi_ordering = line[1]
+ elif line[0] == '/Supplement': csi_supplement = line[1]
+ elif len(line) > 1 and line[1] == 'usecmap': usecmap = line[0][1:]
+ elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
+ elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
+ elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
+ elif line[0].startswith("end"):
+ current = None
+ elif current == 'codespacerange' and len(line) == 2:
+ n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
+ codespacerange.append((n, a, b))
+ elif current == 'cidrange' and len(line) == 3:
+ a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
+ map_cidrange(a, b, c)
+ elif current == 'cidchar' and len(line) == 2:
+ a, b = tocode(line[0]), tocode(line[1])
+ map_cidchar(a, b)
+
+ # Print flattened CMap file
+
+ print "%!PS-Adobe-3.0 Resource-CMap"
+ print "%%DocumentNeededResources: procset (CIDInit)"
+ print "%%IncludeResource: procset (CIDInit)"
+ print "%%%%BeginResource: CMap (%s)" % cmapname
+ print "%%%%Version: %s" % cmapversion
+ print "%%EndComments"
+ print "/CIDInit /ProcSet findresource begin"
+ print "12 dict begin"
+ print "begincmap"
+ if usecmap: print "/%s usecmap" % usecmap
+ print "/CIDSystemInfo 3 dict dup begin"
+ print " /Registry %s def" % csi_registry
+ print " /Ordering %s def" % csi_ordering
+ print " /Supplement %s def" % csi_supplement
+ print "end def"
+ print "/CMapName /%s def" % cmapname
+ print "/CMapVersion %s def" % cmapversion
+ print "/CMapType 1 def"
+ print "/WMode %d def" % wmode
+
+ if len(codespacerange):
+ print "%d begincodespacerange" % len(codespacerange)
+ for r in codespacerange:
+ fmt = "<%%0%dx> <%%0%dx>" % (r[0]*2, r[0]*2)
+ print fmt % (r[1], r[2])
+ print "endcodespacerange"
+
+ keys = map.keys()
+ keys.sort()
+ print "%d begincidchar" % len(keys)
+ for code in keys:
+ v = map[code]
+ print "<%04x> %d" % (code, v)
+ print "endcidchar"
+
+ print "endcmap"
+ print "CMapName currentdict /CMap defineresource pop"
+ print "end"
+ print "end"
+ print "%%EndResource"
+ print "%%EOF"
+
+for arg in sys.argv[1:]:
+ flattencmap(arg)