From a6d083bb776ecd498e57450ef84c20e39ae604cf Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Wed, 4 Apr 2018 16:36:21 +0200 Subject: Add CMap processing scripts, and turn cmapdump into mutool. A dumping script written in python. A flattening script written in python (for easier editing). A subsetting script written in shell to minimize CMaps by reusing subsets. Use 'mutool cmapdump' to bootstrap or verify cmap dumps. --- scripts/cmapflatten.py | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 scripts/cmapflatten.py (limited to 'scripts/cmapflatten.py') diff --git a/scripts/cmapflatten.py b/scripts/cmapflatten.py new file mode 100644 index 00000000..8bb2193a --- /dev/null +++ b/scripts/cmapflatten.py @@ -0,0 +1,108 @@ +# Parse a Uni* CMap file and flatten it. +# +# The Uni* CMap files only have 'cidchar' and 'cidrange' sections, never +# 'bfchar' or 'bfrange'. + +import sys + +def flattencmap(filename): + codespacerange = [] + usecmap = "" + cmapname = "" + cmapversion = "1.0" + csi_registry = "(Adobe)" + csi_ordering = "(Unknown)" + csi_supplement = 1 + wmode = 0 + + map = {} + + def tocode(s): + if s[0] == '<' and s[-1] == '>': + return int(s[1:-1], 16) + return int(s, 10) + + def map_cidchar(lo, v): + map[lo] = v + + def map_cidrange(lo, hi, v): + while lo <= hi: + map[lo] = v + lo = lo + 1 + v = v + 1 + + current = None + for line in open(filename, "r").readlines(): + if line[0] == '%': + continue + line = line.strip().split() + if len(line) == 0: + continue + if line[0] == '/CMapVersion': cmapversion = line[1] + elif line[0] == '/CMapName': cmapname = line[1][1:] + elif line[0] == '/WMode': wmode = int(line[1]) + elif line[0] == '/Registry': csi_registry = line[1] + elif line[0] == '/Ordering': csi_ordering = line[1] + elif line[0] == '/Supplement': csi_supplement = line[1] + elif len(line) > 1 and line[1] == 'usecmap': usecmap = line[0][1:] + elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange' + elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange' + elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar' + elif line[0].startswith("end"): + current = None + elif current == 'codespacerange' and len(line) == 2: + n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1]) + codespacerange.append((n, a, b)) + elif current == 'cidrange' and len(line) == 3: + a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2]) + map_cidrange(a, b, c) + elif current == 'cidchar' and len(line) == 2: + a, b = tocode(line[0]), tocode(line[1]) + map_cidchar(a, b) + + # Print flattened CMap file + + print "%!PS-Adobe-3.0 Resource-CMap" + print "%%DocumentNeededResources: procset (CIDInit)" + print "%%IncludeResource: procset (CIDInit)" + print "%%%%BeginResource: CMap (%s)" % cmapname + print "%%%%Version: %s" % cmapversion + print "%%EndComments" + print "/CIDInit /ProcSet findresource begin" + print "12 dict begin" + print "begincmap" + if usecmap: print "/%s usecmap" % usecmap + print "/CIDSystemInfo 3 dict dup begin" + print " /Registry %s def" % csi_registry + print " /Ordering %s def" % csi_ordering + print " /Supplement %s def" % csi_supplement + print "end def" + print "/CMapName /%s def" % cmapname + print "/CMapVersion %s def" % cmapversion + print "/CMapType 1 def" + print "/WMode %d def" % wmode + + if len(codespacerange): + print "%d begincodespacerange" % len(codespacerange) + for r in codespacerange: + fmt = "<%%0%dx> <%%0%dx>" % (r[0]*2, r[0]*2) + print fmt % (r[1], r[2]) + print "endcodespacerange" + + keys = map.keys() + keys.sort() + print "%d begincidchar" % len(keys) + for code in keys: + v = map[code] + print "<%04x> %d" % (code, v) + print "endcidchar" + + print "endcmap" + print "CMapName currentdict /CMap defineresource pop" + print "end" + print "end" + print "%%EndResource" + print "%%EOF" + +for arg in sys.argv[1:]: + flattencmap(arg) -- cgit v1.2.3