summaryrefslogtreecommitdiff
path: root/scripts/cmapflatten.py
blob: 8bb2193a8eedaa2491440af177a9f6112d119ef8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Parse a Uni* CMap file and flatten it.
#
# The Uni* CMap files only have 'cidchar' and 'cidrange' sections, never
# 'bfchar' or 'bfrange'.

import sys

def flattencmap(filename):
	codespacerange = []
	usecmap = ""
	cmapname = ""
	cmapversion = "1.0"
	csi_registry = "(Adobe)"
	csi_ordering = "(Unknown)"
	csi_supplement = 1
	wmode = 0

	map = {}

	def tocode(s):
		if s[0] == '<' and s[-1] == '>':
			return int(s[1:-1], 16)
		return int(s, 10)

	def map_cidchar(lo, v):
		map[lo] = v

	def map_cidrange(lo, hi, v):
		while lo <= hi:
			map[lo] = v
			lo = lo + 1
			v = v + 1

	current = None
	for line in open(filename, "r").readlines():
		if line[0] == '%':
			continue
		line = line.strip().split()
		if len(line) == 0:
			continue
		if line[0] == '/CMapVersion': cmapversion = line[1]
		elif line[0] == '/CMapName': cmapname = line[1][1:]
		elif line[0] == '/WMode': wmode = int(line[1])
		elif line[0] == '/Registry': csi_registry = line[1]
		elif line[0] == '/Ordering': csi_ordering = line[1]
		elif line[0] == '/Supplement': csi_supplement = line[1]
		elif len(line) > 1 and line[1] == 'usecmap': usecmap = line[0][1:]
		elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
		elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
		elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
		elif line[0].startswith("end"):
			current = None
		elif current == 'codespacerange' and len(line) == 2:
			n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
			codespacerange.append((n, a, b))
		elif current == 'cidrange' and len(line) == 3:
			a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
			map_cidrange(a, b, c)
		elif current == 'cidchar' and len(line) == 2:
			a, b = tocode(line[0]), tocode(line[1])
			map_cidchar(a, b)

	# Print flattened CMap file

	print "%!PS-Adobe-3.0 Resource-CMap"
	print "%%DocumentNeededResources: procset (CIDInit)"
	print "%%IncludeResource: procset (CIDInit)"
	print "%%%%BeginResource: CMap (%s)" % cmapname
	print "%%%%Version: %s" % cmapversion
	print "%%EndComments"
	print "/CIDInit /ProcSet findresource begin"
	print "12 dict begin"
	print "begincmap"
	if usecmap: print "/%s usecmap" % usecmap
	print "/CIDSystemInfo 3 dict dup begin"
	print "  /Registry %s def" % csi_registry
	print "  /Ordering %s def" % csi_ordering
	print "  /Supplement %s def" % csi_supplement
	print "end def"
	print "/CMapName /%s def" % cmapname
	print "/CMapVersion %s def" % cmapversion
	print "/CMapType 1 def"
	print "/WMode %d def" % wmode

	if len(codespacerange):
		print "%d begincodespacerange" % len(codespacerange)
		for r in codespacerange:
			fmt = "<%%0%dx> <%%0%dx>" % (r[0]*2, r[0]*2)
			print fmt % (r[1], r[2])
		print "endcodespacerange"

	keys = map.keys()
	keys.sort()
	print "%d begincidchar" % len(keys)
	for code in keys:
		v = map[code]
		print "<%04x> %d" % (code, v)
	print "endcidchar"

	print "endcmap"
	print "CMapName currentdict /CMap defineresource pop"
	print "end"
	print "end"
	print "%%EndResource"
	print "%%EOF"

for arg in sys.argv[1:]:
	flattencmap(arg)