First drop of code :-)

This commit is contained in:
david reid
2015-03-20 18:10:40 +00:00
parent 7dffa90495
commit fb8e82d33a
9 changed files with 1456 additions and 0 deletions

7
.gitignore vendored
View File

@@ -52,3 +52,10 @@ docs/_build/
# PyBuilder
target/
# Ignore PyCharm stuff
.idea
# Ignore any fonts copied while testing!
*.ttf

View File

@@ -1,2 +1,59 @@
# zttf
Python TTF file parser
This was written to allow fonts to be parsed and then subsets generated for use in a PDF documents.
It was developed using Python 3.4 and will work to a degree with Python 2 it needs additional testing and development there.
## Simple Usage
'''python
>>> from zttf.ttfile import TTFile
>>> font_file = TTFile('DroidSans.ttf')
>>> font_file.is_valid
True
>>> font_file.faces
[<zttf.ttf.TTFont object at 0x7f3569b73b50>]
>>> face = font_file.faces[0]
>>> face.family_name
Droid Sans
>>> face.name
DroidSans
>>> face.italic_angle
0
'''
When used with a font collection, there will be multiple faces available.
'''python
>>> from zttf.ttfile import TTFile
>>> font_file = TTFile('Futura.ttc')
>>> font_file.is_valid
True
>>> font_file.faces
[<zttf.ttf.TTFont object at 0x7fc97520bc50>, <zttf.ttf.TTFont object at 0x7fc97520bc90>, <zttf.ttf.TTFont object at 0x7fc97520bd90>, <zttf.ttf.TTFont object at 0x7fc973b4c190>]
>>> font_file.faces[0].font_family
Futura
>>> font_file.faces[0].name
Futura-Medium
>>> font_file.faces[1].name
Futura-MediumItalic
>>> font_file.faces[2].name
Futura-CondensedMedium
>>> font_file.faces[3].name
Futura-CondensedExtraBold
'''
Subsetting is done by passing in a subset of the characters desired. All required glyphs will be found and copied into the new file.
'''python
>>> from zttf.ttfile import TTFile
>>> font_file = TTFile('Futura.ttc')
>>> subset = [ord('H'), ord('e'), ord('l'), ord('o')]
>>> sub_font = font_file.faces[0].make_subset(subset)
>>> sub_font.output()
...
>>> with open('new_font.ttf', 'wb') as fh:
fh.write(sub_font.output())
'''

23
example.py Normal file
View File

@@ -0,0 +1,23 @@
import sys
from zttf.ttfile import TTFile
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Usage: {} <font filename>".format(sys.argv[0]))
sys.exit(0)
t = TTFile(sys.argv[1])
print("Is valid? {}".format(t.is_valid))
if not t.is_valid:
sys.exit(0)
print(t.faces)
print(t.faces[0].font_family)
print(t.faces[0].name)
print(t.faces[0].italic_angle)
subset = [ord('H'), ord('e'), ord('l'), ord('o')]
font_subset = t.faces[0].make_subset(subset)
with open('font_subset.ttf', 'wb') as fh:
fh.write(font_subset.output())

0
zttf/__init__.py Normal file
View File

571
zttf/objects.py Normal file
View File

@@ -0,0 +1,571 @@
# TrueType Font Glyph operators
from struct import unpack, calcsize
from zttf.utils import PackedFormat, fixed_version, read_list_uint16, Range, read_list_int16, glyph_more_components, \
glyf_skip_format, ttf_checksum
TTF_NAMES = {
0: 'Copyright Notice',
1: 'Font Family Name',
2: 'Font Subfamily Name',
3: 'Unique Font Identifier',
4: 'Full Font Name',
5: 'Version String',
6: 'Postscript Name',
7: 'Trademark',
8: 'Manufacturer Name',
9: 'Designer',
10: 'Description',
11: 'Vendor URL',
12: 'Designer URL',
13: 'Licencee Description',
14: 'Licence URL',
15: 'Preferred Family',
16: 'Preferred Subfamily',
17: 'Compatible Full',
18: 'Sample Text',
19: 'PS CID findfont name',
20: 'WWS Family Name',
21: 'WWS Subfamily Name'
}
class TTFNameRecord(PackedFormat):
FORMAT = [
{'name': 'platform_id', 'format': 'H'},
{'name': 'encoding_id', 'format': 'H'},
{'name': 'language_id', 'format': 'H'},
{'name': 'name', 'format': 'H'},
{'name': 'length', 'format': 'H'},
{'name': 'offset', 'format': 'H'},
]
def __init__(self, fh, data):
self.pos = fh.tell()
PackedFormat.__init__(self, fh)
self.raw = data[self.offset:self.offset + self.length]
self.value = self.raw
if self.platform_id == 1:
if self.encoding_id == 0:
self.value = self.raw.decode('iso-8859-1')
elif self.platform_id == 3:
if self.encoding_id == 1:
# UCS-2
self.value = self.raw.decode('utf-16-be')
def __str__(self):
return '{:08d} @ {:08X} - {:>30s}: {}'.format(self.pos, self.offset,
TTF_NAMES.get(self.name, 'Unknown Name {:X}'.format(self.name)),
self.value)
class TTF_name(PackedFormat):
FORMAT = [
{'name': 'format', 'format': 'H'},
{'name': 'count', 'format': 'H'},
{'name': 'offset', 'format': 'H'},
]
def __init__(self, fh, length):
start_pos = fh.tell()
PackedFormat.__init__(self, fh)
pos = fh.tell()
fh.seek(start_pos + self.offset)
data = fh.read(length - self.offset)
fh.seek(pos)
self.names = []
for n in range(self.count):
self.names.append(TTFNameRecord(fh, data))
# print("{} / {} - {}".format(n + 1, self.count, self.names[-1]))
def get_name(self, name, default=None):
for n in self.names:
if n.name == name and n.platform_id == 1 and n.encoding_id == 0:
return n.value
return default
class TTFHeader(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'I', 'convert': fixed_version},
{'name': 'num_tables', 'format': 'H'},
{'name': 'search_range', 'format': 'H'},
{'name': 'entry_selector', 'format': 'H'},
{'name': 'range_shift', 'format': 'H'},
]
def __init__(self, fh=None):
self.tables = []
self.num_tables = 0
PackedFormat.__init__(self, fh)
for n in range(self.num_tables):
self.tables.append(TTFOffsetTable(fh))
def check_version(self):
return self.version == 1
def get_tag(self, tag):
for t in self.tables:
if t.tag == tag:
return t
if tag == b'os2' and t.tag == b'OS/2':
return t
return None
def dump_tables(self):
print("TTF Header Tables:")
for t in self.tables:
print(" {} @ {}".format(t.tag, t.offset))
class TTF_kern(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'H'},
{'name': 'num_tables', 'format': 'H'}
]
def __init__(self, fh=None, length=None):
self.subtables = []
PackedFormat.__init__(self, fh)
if fh is None:
return
for n in range(self.num_tables):
tbl = TTF_kern_subtable(fh)
fh.seek(tbl.length - len(tbl), 1)
self.subtables.append(tbl)
class TTF_kern_subtable(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'H'},
{'name': 'length', 'format': 'H'},
{'name': 'coverage', 'format': 'H'},
]
def __init__(self, fh=None):
if fh is not None:
self.offset = fh.tell()
PackedFormat.__init__(self, fh)
class TTFOffsetTable(PackedFormat):
FORMAT = [
{'name': 'tag', 'format': '4s'},
{'name': 'checksum', 'format': 'I'},
{'name': 'offset', 'format': 'I'},
{'name': 'length', 'format': 'I'},
]
def __str__(self):
return 'Offset Table: {} {} bytes @ {}'.format(self.tag, self.length, self.offset)
def padded_length(self):
return self.length + 3 & ~ 3
def padded_data(self, data):
extra = self.padded_length() - len(data)
if extra > 0:
return data + '\0' * extra
return data
def calculate_checksum(self, data):
self.checksum = ttf_checksum(data)
class TTF_head(PackedFormat):
FORMAT = [
{'name': 'vers', 'format': 'i'},
{'name': 'font_version', 'format': 'i'},
{'name': 'checksum_adj', 'format': 'I'},
{'name': 'magic_number', 'format': 'I'},
{'name': 'flags', 'format': 'H'},
{'name': 'units_per_em', 'format': 'H', 'convert': float},
{'name': 'created', 'format': 'q'},
{'name': 'modified', 'format': 'q'},
{'name': 'x_min', 'format': 'h'},
{'name': 'y_min', 'format': 'h'},
{'name': 'x_max', 'format': 'h'},
{'name': 'y_max', 'format': 'h'},
{'name': 'mac_style', 'format': 'H'},
{'name': 'lowest_rec_ppem', 'format': 'H'},
{'name': 'direction_hint', 'format': 'H'},
{'name': 'index_to_loc_format', 'format': 'h'},
{'name': 'glyph_data_format', 'format': 'h'},
]
@property
def bounding_box(self):
scale = 1000 / self.units_per_em
return [(self.x_min * scale),
(self.y_min * scale),
(self.x_max * scale),
(self.y_max * scale)]
def decode_mac_style(self):
return {
'bold': self.mac_style & 1 << 0,
'italic': self.mac_style & 1,
'underline': self.mac_style & 1 << 1,
'outline': self.mac_style & 1 << 2,
'shadow': self.mac_style & 1 << 3,
'condensed': self.mac_style & 1 << 4,
'extended': self.mac_style & 1 << 5
}
class TTF_hhea(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'i', 'convert': fixed_version},
{'name': 'ascender', 'format': 'h'},
{'name': 'descender', 'format': 'h'},
{'name': 'line_gap', 'format': 'h'},
{'name': 'advance_width_max', 'format': 'H'},
{'name': 'min_left_side_bearing', 'format': 'h'},
{'name': 'min_right_dide_brearing', 'format': 'h'},
{'name': 'x_max_extant', 'format': 'h'},
{'name': 'caret_slope_rise', 'format': 'h'},
{'name': 'caret_slope_run', 'format': 'h'},
{'name': 'caret_offset', 'format': 'h'},
{'name': 'reserved', 'format': 'q'},
{'name': 'metric_data_format', 'format': 'h'},
{'name': 'number_of_metrics', 'format': 'H'},
]
class TTF_os2(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'H'},
{'name': 'xAvgCharWidth', 'format': 'h'},
{'name': 'weight_class', 'format': 'H'},
{'name': 'usWidthClass', 'format': 'H'},
{'name': 'fsType', 'format': 'H'},
{'name': 'ySubscriptXSize', 'format': 'h'},
{'name': 'ySubscriptYSize', 'format': 'h'},
{'name': 'ySubscriptXOffset', 'format': 'h'},
{'name': 'ySubscriptYOffset', 'format': 'h'},
{'name': 'ySuperscriptXSize', 'format': 'h'},
{'name': 'ySuperscriptYSize', 'format': 'h'},
{'name': 'ySuperscriptXOffset', 'format': 'h'},
{'name': 'ySuperscriptYOffset', 'format': 'h'},
{'name': 'yStrikeoutSize', 'format': 'h'},
{'name': 'yStrikeoutPosition', 'format': 'h'},
{'name': 'sFamilyClass', 'format': 'h'},
{'name': 'panose', 'format': '10s'},
{'name': 'ulUnicodeRange1', 'format': 'I'},
{'name': 'ulUnicodeRange2', 'format': 'I'},
{'name': 'ulUnicodeRange3', 'format': 'I'},
{'name': 'ulUnicodeRange4', 'format': 'I'},
{'name': 'achVendID', 'format': '4s'},
{'name': 'fsSelection', 'format': 'H'},
{'name': 'usFirstCharIndex', 'format': 'H'},
{'name': 'usLastCharIndex', 'format': 'H'},
{'name': 'sTypoAscender', 'format': 'h'},
{'name': 'sTypoDescender', 'format': 'h'},
{'name': 'typo_line_gap', 'format': 'h'},
{'name': 'win_ascent', 'format': 'H'},
{'name': 'win_descent', 'format': 'H'},
{'name': 'ulCodePageRange1', 'format': 'I'},
{'name': 'ulCodePageRange2', 'format': 'I'},
{'name': 'sxHeight', 'format': 'h'},
{'name': 'cap_height', 'format': 'h'},
{'name': 'usDefaultChar', 'format': 'H'},
{'name': 'usBreakChar', 'format': 'H'},
{'name': 'usMaxContext', 'format': 'H'}
]
class TTF_post(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'I', 'convert': fixed_version},
{'name': 'italic_angle', 'format': 'I'},
{'name': 'underline_position', 'format': 'h'},
{'name': 'underline_thickness', 'format': 'h'},
{'name': 'is_fixed_pitch', 'format': 'I'},
{'name': 'min_mem_type42', 'format': 'I'},
{'name': 'max_mem_type42', 'format': 'I'},
{'name': 'min_mem_type1', 'format': 'I'},
{'name': 'max_mem_type1', 'format': 'I'},
]
class TTF_maxp(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'I', 'convert': fixed_version},
{'name': 'num_glyphs', 'format': 'H'},
]
class TTF_cmap4(PackedFormat):
FORMAT = [
{'name': 'language', 'format': 'H'},
{'name': 'seg_count', 'format': 'H', 'convert': '_halve_'},
{'name': 'src_range', 'format': 'H'},
{'name': 'entry_selector', 'format': 'H'},
{'name': 'range_shift', 'format': 'H'},
]
@staticmethod
def _halve_(n):
return int(n / 2)
class CMAPRange:
def __init__(self, start, end, delta, offset, n_segments):
self.start = start
self.end = end
self.delta = delta
self.offset = 0 if offset == 0 else int(offset / 2 - n_segments)
def contains(self, n):
return self.start <= n <= self.end
def coverage(self):
return range(self.start, self.end + 1)
def char_to_glyph(self, n, glyphs):
if self.offset == 0:
return (n + self.delta) & 0xFFFF
idx = self.offset + n - self.start
if 0 < idx < len(glyphs):
print("Invalid index for glyphs! {}".format(idx))
return 0
return (glyphs[idx] + self.delta) & 0xFFFF
def __init__(self, fh=None, length=None):
start = fh.tell() - 4
PackedFormat.__init__(self, fh)
if fh is None:
return
self.ranges = []
end_codes = read_list_uint16(fh, self.seg_count + 1)
if end_codes[self.seg_count] != 0:
print("INVALID pad byte....")
return
start_codes = read_list_uint16(fh, self.seg_count)
iddelta = read_list_int16(fh, self.seg_count)
offset_start = fh.tell()
id_offset = read_list_uint16(fh, self.seg_count)
ids_length = int((length - (fh.tell() - start)) / 2)
self.glyph_ids = read_list_uint16(fh, ids_length)
for n in range(self.seg_count):
self.ranges.append(self.CMAPRange(start_codes[n], end_codes[n], iddelta[n], id_offset[n], self.seg_count - n))
def __len__(self):
return len(self.ranges)
def char_to_glyph(self, char):
for r in self.ranges:
if not r.contains(char):
continue
return r.char_to_glyph(char, self.glyph_ids)
def as_map(self, max_char):
cm = {}
for r in self.ranges:
if r.start > max_char:
continue
for c in range(r.start, max(r.end, max_char)):
cm[c] = r.char_to_glyph(c, self.glyph_ids)
return cm
class TTF_cmap6(PackedFormat):
FORMAT = [
{'name': 'language', 'format': 'H'},
{'name': 'first_code', 'format': 'H'},
{'name': 'entry_count', 'format': 'H'},
]
def __init__(self, fh, length):
PackedFormat.__init__(self, fh)
self.char_map = {}
self.glyph_map = {}
mapping = read_list_uint16(fh, self.entry_count)
for n in range(self.entry_count):
self.char_map[n] = mapping[n]
self.glyph_map.setdefault(mapping[n], []).append(n)
def __len__(self):
return len(self.char_map)
class TTF_cmap(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'H'},
{'name': 'count', 'format': 'H'},
]
PREFS = [(0, 4), (0, 3), (3, 1)]
def __init__(self, fh=None, length=0):
self.count = 0
if fh:
start_pos = fh.tell()
PackedFormat.__init__(self, fh)
self.tables = {}
self.map_table = None
if self.count == 0:
return
for n in range(self.count):
tbl = TTFcmapTable(fh)
self.tables[(tbl.platform_id, tbl.encoding_id)] = tbl
pos = fh.tell()
fh.seek(start_pos + tbl.offset)
tbl.format, length = read_list_uint16(fh, 2)
if tbl.format == 4:
tbl.map_data = TTF_cmap4(fh, length)
elif tbl.format == 6:
tbl.map_data = TTF_cmap6(fh, length)
fh.seek(pos)
# Choose the mapping we are going to use, initially on preferences and
# then just fallback to first available map.
for p in self.PREFS:
if p in self.tables and self.tables[p].has_map_data:
self.map_table = self.tables[p].map_data
break
if self.map_table is None:
for t in self.tables.values():
if t.has_map_data:
self.map_table = t.map_data
break
def char_to_glyph(self, char, fh):
for p in self.PREFS:
if p in self.tables and self.tables[p].has_map_data:
for rng in self.tables[p].map_data.ranges:
if rng.end < char:
continue
if rng.start > char:
continue
return rng.char_to_glyph(char, fh)
return None
def char_map(self, max_char=256):
return self.map_table.as_map(max_char)
def as_table_string(self):
s = PackedFormat.as_table_string(self)
n = 0
for t in self.tables:
s += '\nTable: {}\n'.format(n)
s += t.as_table_string()
n += 1
return s
class TTFcmapTable(PackedFormat):
FORMAT = [
{'name': 'platform_id', 'format': 'H'},
{'name': 'encoding_id', 'format': 'H'},
{'name': 'offset', 'format': 'I'},
]
def __init__(self, fh=None):
PackedFormat.__init__(self, fh)
self.format = 0
self.map_data = None
self.position = 0
@property
def has_map_data(self):
return self.map_data is not None and len(self.map_data) > 0
def as_map(self, max_char):
cm = {}
for r in self.map_data.ranges:
cm.update(r.as_map(max_char))
return cm
class TTF_glyf(PackedFormat):
FORMAT = [
{'name': 'contours', 'format': 'h'},
{'name': 'x_min', 'format': 'h'},
{'name': 'y_min', 'format': 'h'},
{'name': 'x_max', 'format': 'h'},
{'name': 'y_max', 'format': 'h'},
]
def __init__(self, fh=None, num=0, data=None):
self.glyph = num
self.components = []
self.required = set()
PackedFormat.__init__(self, fh=fh, data=data)
# If the glyph is a compound glyph, ie it's made up of parts of other glyphs,
# then we need to ensure we have all the component glyphs listed.
if self.contours < 0:
while True:
(flags, next_glyph) = read_list_uint16(fh, 2)
self.required.add(next_glyph)
fh.read(calcsize(glyf_skip_format(flags)))
if not glyph_more_components(flags):
break
def is_compound(self):
return self.contours < 0
def glyph_set(self):
rqd = set(self.required)
for c in self.components:
rqd.extend(c.required)
return sorted(rqd)
class TTFCollectionHeader(PackedFormat):
FORMAT = [
{'name': 'tag', 'format': '4s'},
{'name': 'version', 'format': 'I', 'convert': fixed_version},
{'name': 'count', 'format': 'I'}
]
def __init__(self, fh):
PackedFormat.__init__(self, fh)
self.offsets = []
self.is_collection = (self.tag == b'ttcf')
if self.is_collection:
for i in range(self.count):
self.offsets.append(unpack('>I', fh.read(4))[0])
else:
self.count = 1
self.offsets = [0]
if self.version == 2:
self.dsig_tag, self.dsig_length, self.dsig_offset = unpack("III", fh.read(calcsize('III')))
class TTF_gpos(PackedFormat):
FORMAT = [
{'name': 'version', 'format': 'I', 'convert': fixed_version},
{'name': 'script_list', 'format': 'H'},
{'name': 'feature_list', 'format': 'H'},
{'name': 'lookup_list', 'format': 'H'},
]
class OFT_ScriptList(PackedFormat):
FORMAT = [
{'name': 'count', 'format': 'H'}
]
def __init__(self, fh, length=None):
self.records = []
PackedFormat.__init__(self, fh)
for n in range(self.count):
self.records.append(OFT_ScriptRecord(fh))
class OFT_ScriptRecord(PackedFormat):
FORMAT = [
{'name': 'tag', 'format': '4s'},
{'name': 'offset', 'format': 'H'}
]

257
zttf/subset.py Normal file
View File

@@ -0,0 +1,257 @@
from io import BytesIO
from struct import pack, unpack, calcsize, error as struct_error
from zttf.objects import TTF_post, TTFHeader, TTFOffsetTable, TTF_kern, TTF_kern_subtable
from zttf.utils import Range, glyph_more_components, glyf_skip_format, ttf_checksum, binary_search_parameters
class TTFSubset:
def __init__(self, parent, subset):
self.parent = parent
self.subset = subset
self.tables = {}
# We need to build 2 maps, one for character -> glyph and one
# for glyph -> character
self.orig_char_to_glyph = {}
self.orig_glyph_to_char = {}
self.glyph_map = {}
self.char_to_glyph = {}
self.glyph_to_char = {}
self.cmap_ranges = []
self.required_glyphs = [0]
self.metrics = []
self.fh = None
def start_table(self, tag, data=None):
b = BytesIO()
if data is not None:
b.write(data)
self.tables[tag] = b
return b
def find_glyph_subset(self):
for s in self.subset:
self.parent.char_to_glyph(s)
char_to_glyphs = self.parent.get_table(b'cmap').char_map()
rqd = []
for code in self.subset:
glyph = char_to_glyphs.get(code)
if glyph is None:
print("Unknown character in parent mapping: {}".format(code))
continue
# print("character {} is glyph {}".format(code, glyph))
self.orig_char_to_glyph[code] = glyph
self.orig_glyph_to_char.setdefault(glyph, []).append(code)
if glyph not in rqd:
rqd.append(glyph)
for glyph in rqd:
self.required_glyphs.append(glyph)
self.required_glyphs.extend(self.parent.get_glyph_components(glyph))
self.required_glyphs.sort()
self.glyph_map = {}
for rg in self.required_glyphs:
glyph = len(self.glyph_map) + 1
self.glyph_map[rg] = glyph
if rg in self.orig_glyph_to_char:
for cc in self.orig_glyph_to_char[rg]:
self.char_to_glyph[cc] = glyph
self.glyph_to_char[glyph] = self.orig_glyph_to_char[rg]
def copy_tables(self):
for tag in [b'name', b'cvt', b'fpgm', b'prep', b'gasp']:
if tag in self.parent.tables:
buff = self.start_table(tag)
tbl = self.parent.header.get_tag(tag)
self.fh.seek(tbl.offset)
buff.write(self.fh.read(tbl.length))
new_post = TTF_post()
for f in ['italic_angle', 'underline_position', 'Underline_thickness', 'is_fixed_pitch']:
setattr(new_post, f, self.parent.get_table_attr(b'post', f))
self.start_table(b'post', new_post.as_bytes())
head = self.parent.copy_table(b'head')
head.checksum_adj = 0
head.index_to_loc_format = 0
self.start_table(b'head', head.as_bytes())
hhea = self.parent.copy_table(b'hhea')
hhea.number_of_metrics = len(self.metrics)
self.start_table(b'hhea', hhea.as_bytes())
maxp = self.parent.copy_table(b'maxp')
maxp.b_glyphs = len(self.required_glyphs)
self.start_table(b'maxp', maxp.as_bytes())
self.start_table(b'os2', self.parent.copy_table(b'os2').as_bytes())
# todo - is it worth finding a way to subset the GPOS and LTSH tables?
def build_cmap_ranges(self):
# As we will likely have a scattered map we will use CMAP Format 4.
# We take the character mappings we have and build 4 lists...
# start code
# end code
# id delta
# range offset
self.cmap_ranges = []
for cc, glyph in sorted(self.char_to_glyph.items()):
try:
current = self.cmap_ranges[-1]
if current is None or not current.is_consecutive(cc, glyph):
self.cmap_ranges.append(Range(cc, glyph))
else:
current.expand(cc)
except IndexError:
self.cmap_ranges.append(Range(cc, glyph))
def add_cmap_table(self):
if self.cmap_ranges == []:
self.build_cmap_ranges()
self.cmap_ranges.append(Range(0xffff, 0))
self.cmap_ranges[-1].iddelta = 0
seg_count = len(self.cmap_ranges)
src_range, entry_selector, range_shift = binary_search_parameters(seg_count * 2)
length = 16 + 8 * seg_count + len(self.glyph_to_char) + 1
data = [
0, # version
1, # number of subtables
3, # platform id (MS)
1, # endocing id (Unicode)
0, 12, # subtable location
# subtable
4, # format
length, # length
0, # language
seg_count * 2, # seg count * 2
src_range, # search range (2 ** floor(log2(seg_count)))
entry_selector, # entry selector log2(src_range / 2)
seg_count * 2 - src_range, # range shift ( 2 * seg_count - search_range)
]
data.extend([r.end for r in self.cmap_ranges])
data.append(0)
data.extend([r.start for r in self.cmap_ranges])
buff = self.start_table(b'cmap')
buff.write(pack(">{}H".format(len(data)), *data))
buff.write(pack(">{}h".format(len(self.cmap_ranges)), *[r.iddelta for r in self.cmap_ranges]))
buff.write(pack(">{}H".format(len(self.cmap_ranges)), *[r.offset for r in self.cmap_ranges]))
buff.write(pack(">{}H".format(len(self.cmap_ranges)), *[r.start_glyph for r in self.cmap_ranges]))
def get_glyphs(self):
locations = []
self.metrics = []
buff = self.start_table(b'glyf')
for g in self.required_glyphs:
locations.append(int(buff.tell() / 2))
data = self.parent.get_glyph_data(g)
if data == b'':
continue
if unpack(">h", data[:2])[0] == -1:
# need to adjust glyph index...
pos = 10
while True:
flags, next_glyph = unpack(">HH", data[pos: pos + 4])
data = data[:pos + 2] + pack(">H", self.glyph_map[next_glyph]) + data[pos+4:]
pos += 4 + calcsize(glyf_skip_format(flags))
if not glyph_more_components(flags):
break
buff.write(data)
self.metrics.append(self.parent.glyph_metrics[g])
loca = self.start_table(b'loca')
loca.write(pack(">{}H".format(len(locations)), *locations))
hmtx = self.start_table(b'hmtx')
for m in self.metrics:
hmtx.write(pack(">Hh", *m))
def add_kern_data(self):
entries = {}
for k, diff in self.parent.glyph_kern.items():
if k[0] not in self.required_glyphs or k[1] not in self.required_glyphs:
continue
# print("mapping {} to ({}, {})".format(k, self.glyph_map[k[0]], self.glyph_map[k[1]]))
entries[(self.glyph_map[k[0]], self.glyph_map[k[1]])] = diff
if len(entries) == 0:
return
kern = self.start_table(b'kern')
kh = TTF_kern()
kh.version = 0
kh.num_tables = 1
kern.write(kh.as_bytes())
st = TTF_kern_subtable()
st.length = len(st) + 6 * len(entries) + 8
st.version = 0
st.coverage = 1
kern.write(st.as_bytes())
kern.write(pack(">H", len(entries)))
kern.write(pack(">HHH", *binary_search_parameters(len(entries))))
for key, diff in entries.items():
kern.write(pack(">HHh", key[0], key[1], diff))
# Put the TTF file together
def output(self):
""" Generate a binary based on the subset we have been given. """
self.fh = open(self.parent.filename, 'rb')
self.fh.seek(self.parent.start_pos)
self.find_glyph_subset()
self.add_kern_data()
self.copy_tables()
self.add_cmap_table()
self.get_glyphs()
# self.dump_tables()
self.fh.close()
header = TTFHeader()
header.num_tables = len(self.tables)
header.version_raw = 0x00010000
output = BytesIO()
header.entry_selector, header.search_range, header.range_shift = binary_search_parameters(len(self.tables))
output.write(header.as_bytes())
head_offset = 0
offset = output.tell() + 16 * len(self.tables)
sorted_tables = sorted(self.tables.keys())
for tag in sorted_tables:
if tag == b'head':
head_offset = offset
tbl = TTFOffsetTable()
tbl.tag = tag
tbl.offset = offset
data = self.tables[tag].getvalue()
tbl.length = len(data)
tbl.calculate_checksum(data)
offset += tbl.padded_length()
output.write(tbl.as_bytes())
for tag in sorted_tables:
data = self.tables[tag].getvalue()
data += b'\0' * (len(data) % 4)
output.write(data)
checksum = 0xB1B0AFBA - ttf_checksum(output.getvalue())
data = output.getvalue()
try:
data = data[:head_offset + 8] + pack(">I", checksum) + data[head_offset + 12:]
except struct_error:
data = data[:head_offset + 8] + pack(">i", checksum) + data[head_offset + 12:]
return data
def dump_tables(self):
for n in sorted(self.tables):
print("{} {} bytes".format(n, self.tables[n].tell()))

272
zttf/ttf.py Normal file
View File

@@ -0,0 +1,272 @@
from copy import copy
from struct import calcsize, unpack
from zttf.objects import TTFHeader, TTF_head, TTF_name, TTF_hhea, TTF_os2, TTF_post, TTF_maxp, TTF_cmap, TTF_glyf, \
TTF_kern
from zttf.subset import TTFSubset
from zttf.utils import read_list_uint16, read_list_uint32
class TTFont(object):
def __init__(self, filename, offset):
self.header = None
self.tables = {}
self.filename = filename
self.start_pos = offset
self.idx_format = 0
self.n_glyphs = 0
self.glyph_metrics = []
self.glyph_kern = {}
self.file_handle = None
self.parse()
def parse(self):
self._open()
self.header = self._read_class(TTFHeader)
if not self.header.check_version():
return
self.get_table(b'head', TTF_head)
self.get_table(b'name', TTF_name)
self.get_table(b'hhea', TTF_hhea)
self.get_table(b'os2', TTF_os2)
self.get_table(b'post', TTF_post)
self.get_table(b'maxp', TTF_maxp)
self.get_table(b'cmap', TTF_cmap)
self.get_table(b'kern', TTF_kern)
self.idx_format = self.get_table_attr(b'head', 'index_to_loc_format')
self.n_glyphs = self.get_table_attr(b'maxp', 'num_glyphs', 0)
self.get_hmtx()
self.get_loca()
if b'kern' in self.tables:
self.get_kern_data()
self._close()
COMMON_DATA = {
'font_family': (b'name', 1),
'name': (b'name', 6),
'ascender': (b'hhea', 'ascender'),
'descender': (b'hhea', 'descender'),
'units_per_em': (b'head', 'units_per_em', 1000),
'cap_height': (b'os2', 'cap_height', 0),
'bounding_box': (b'head', 'bounding_box'),
'italic_angle': (b'post', 'italic_angle'),
'underline_position': (b'post', 'underline_position'),
'underline_thickness': (b'post', 'underline_thickness'),
'weight_class': (b'os2', 'weight_class'),
'line_gap': (b'hhea', 'line_gap'),
'typo_line_gap': (b'os2', 'typo_line_gap'),
'win_ascent': (b'os2', 'win_ascent'),
'win_descent': (b'os2', 'win_descent')
}
def __getattr__(self, item):
if item in self.COMMON_DATA:
how = self.COMMON_DATA[item]
if how[0] == b'name':
return self.get_name_table(*how[1:])
if len(how) > 2:
return self.get_table_attr(*how[:3])
return self.get_table_attr(*how)
@property
def stemv(self):
return 50 + int(pow((self.weight_class / 65.0), 2))
@property
def italic(self):
return self.italic_angle != 0
def get_string_width(self, string):
width = 0
for n in range(len(string)):
glyph = self.char_to_glyph(ord(string[n]))
(aw, lsb) = self.glyph_metrics[glyph]
width += aw
if n == 0:
width -= lsb
elif n < len(string) - 1:
glyf2 = self.char_to_glyph(ord(string[n + 1]))
width += self.glyph_kern.get((glyph, glyf2), 0)
return width
def get_char_width(self, char):
if isinstance(char, str):
char = ord(char)
idx = self.char_to_glyph(char)
if 0 < idx < len(self.glyph_metrics):
idx = 0
return self.glyph_metrics[idx][0]
# Internal Table Functions
def get_table(self, tag, obj_class=None):
tbl_obj = self.tables.get(tag)
if tbl_obj is None and obj_class is not None:
tbl = self.header.get_tag(tag)
if tbl is None:
return None
orig_pos = self._seek(tbl.offset)
tbl_obj = self._read_class(obj_class, tbl.length)
self.tables[tag] = tbl_obj
self._seek(orig_pos)
return tbl_obj
def get_table_attr(self, tbl, attr, default=None):
if tbl not in self.tables:
return default
return getattr(self.tables[tbl], attr, default)
def get_name_table(self, n_attr, default=None):
if b'name' not in self.tables:
return default
return self.tables[b'name'].get_name(n_attr, default)
def copy_table(self, tag):
tbl = self.get_table(tag)
return copy(tbl)
def _get_table_offset(self, tag):
tbl = self.header.get_tag(tag)
return tbl.offset if tbl is not None else 0
def get_hmtx(self):
""" Read the glyph metrics. """
n_metrics = self.get_table_attr(b'hhea', 'number_of_metrics')
offset = self._get_table_offset(b'hmtx')
if offset == 0:
return False
self._seek(offset)
aw = 0
for n in range(n_metrics):
aw, lsb = unpack(">Hh", self.file_handle.read(4))
self.glyph_metrics.append((aw, lsb))
# Now we have read the aw and lsb for specific glyphs, we need to read additional
# lsb data.
extra = self.n_glyphs - n_metrics
if extra > 0:
lsbs = self._read_list_int16(extra)
for n in range(extra):
self.glyph_metrics.append((aw, lsbs[n]))
def get_loca(self,):
start = self._get_table_offset(b'loca')
self._seek(start)
if self.idx_format == 0:
self.tables[b'loca'] = [n * 2 for n in self._read_list_uint16(self.n_glyphs + 1)]
elif self.idx_format == 1:
self.tables[b'loca'] = self._read_list_uint32(self.n_glyphs + 1)
def get_kern_data(self):
kern = self.get_table(b'kern')
for st in kern.subtables:
if st.coverage != 1 or st.version != 0:
print("coverage = {}, version = {} - skipping".format(st.coverage, st.version))
continue
self._seek(st.offset + len(st))
(npairs, a, b, c) = self._read_list_uint16(4)
for n in range(npairs):
(l, r) = self._read_list_uint16(2)
diff = self._read_int16()
self.glyph_kern[(l, r)] = diff
def char_to_glyph(self, char):
self._open()
cmap = self.get_table(b'cmap')
glyph = cmap.char_to_glyph(char, self.file_handle)
return glyph or 0
def get_glyph_position(self, glyph):
loca = self.get_table(b'loca')
return loca[glyph]
def get_glyph_components(self, glyph):
""" Return a list of any component glyphs required. """
if glyph < 0 or glyph > self.n_glyphs:
print("Missing glyph!!! {}".format(glyph))
return []
pos = self._get_table_offset(b'glyf') + self.get_glyph_position(glyph)
glyf = self._read_class(TTF_glyf, offset=pos, length=glyph)
for g in glyf.required:
for extra_glyph in self.get_glyph_components(g):
if extra_glyph not in glyf.required:
glyf.required.append(extra_glyph)
return sorted(glyf.required)
def get_glyph_data(self, glyph):
data_start = self._get_table_offset(b'glyf')
glyph_start = self.get_glyph_position(glyph)
glyph_length = self.get_glyph_position(glyph + 1) - glyph_start
if glyph_length == 0:
print("Zero length glyph @ {}".format(glyph))
return b''
self._open()
self.file_handle.seek(data_start + glyph_start)
return self.file_handle.read(glyph_length)
def get_binary_table(self, tag):
tbl = self.header.get_tag(tag)
print(tbl)
if tbl is None:
return b''
self._open()
self._seek(tbl.offset)
return self.file_handle.read(tbl.length)
def make_subset(self, subset):
""" Given a subset of characters, create a subset of the full TTF file suitable for
inclusion in a PDF.
:param subset: List of characters to include.
:return: TTFSubset object
"""
return TTFSubset(self, subset)
# File functions.
def _open(self):
if self.file_handle is None:
self.file_handle = open(self.filename, 'rb')
self.file_handle.seek(self.start_pos)
def _close(self):
if self.file_handle is not None:
self.file_handle.close()
self.file_handle = None
def _seek(self, offset, whence=0):
self._open()
pos = self.file_handle.tell()
self.file_handle.seek(offset, whence)
return pos
def _read_class(self, cls, length=None, offset=None):
if offset is not None:
self._seek(offset)
if length is not None:
return cls(self.file_handle, length)
return cls(self.file_handle)
def _skip(self, offset):
if self.file_handle is not None:
self.file_handle.seek(offset, 1)
def _read_list_int16(self, n):
_fmt = ">{}h".format(n)
return unpack(_fmt, self.file_handle.read(calcsize(_fmt)))
def _read_list_uint16(self, n):
return read_list_uint16(self.file_handle, n)
def _read_uint16(self):
return unpack(">H", self.file_handle.read(2))[0]
def _read_int16(self):
return unpack(">h", self.file_handle.read(2))[0]
def _read_list_uint32(self, n):
return read_list_uint32(self.file_handle, n)

22
zttf/ttfile.py Normal file
View File

@@ -0,0 +1,22 @@
from os.path import exists, getsize
from zttf.objects import TTFCollectionHeader
from zttf.ttf import TTFont
class TTFile(object):
def __init__(self, filename):
self.filename = filename
self.faces = []
if not exists(filename) or getsize(filename) == 0:
raise IOError("The file '{}' does not exist or is empty".format(filename))
with open(self.filename, 'rb') as fh:
hdr = TTFCollectionHeader(fh)
for off in hdr.offsets:
self.faces.append(TTFont(filename, off))
@property
def is_valid(self):
return len(self.faces) > 0

247
zttf/utils.py Normal file
View File

@@ -0,0 +1,247 @@
from struct import calcsize, pack, unpack
class PackedFormat:
""" Class to allow simpler extraction of data from a stream into an object with
named attributes.
All child classes need a FORMAT list of dicts describing the data to be extracted.
"""
FORMAT = []
def __init__(self, fh=None, data=None, endian='>'):
self.endian = endian
self.parsed = False
if fh is not None:
self.from_file(fh)
elif data is not None:
self.from_data(data)
def from_file(self, fh):
for _f in self.FORMAT:
if 'format' not in _f:
continue
_fmt = '{}{}'.format(self.endian, _f['format'])
_data = unpack(_fmt, fh.read(calcsize(_fmt)))[0]
if 'name' not in _f:
continue
if 'convert' in _f:
setattr(self, _f['name'] + '_raw', _data)
_fn = _f['convert'] if callable(_f['convert']) else getattr(self, _f['convert'])
if _fn is not None and callable(_fn):
_data = _fn(_data)
setattr(self, _f['name'], _data)
self.parsed = True
def from_data(self, data):
offset = 0
for _f in self.FORMAT:
if 'format' not in _f:
continue
_fmt = '{}{}'.format(self.endian, _f['format'])
_data = unpack(_fmt, data[offset: offset + calcsize(_fmt)])[0]
setattr(self, _f['name'], _data)
offset += calcsize(_fmt)
self.parsed = True
def as_bytes(self):
output = b''
for _f in self.FORMAT:
_fmt = '{}{}'.format(self.endian, _f['format'])
if 'convert' in _f:
_val = getattr(self, _f['name'] + '_raw', '' if 's' in _f['format'] else 0)
else:
_val = getattr(self, _f['name'], '' if 's' in _f['format'] else 0)
output += pack(_fmt, _val)
return output
def as_string(self):
def _name_to_string(n):
return n.replace('_', ' ').capitalize()
ss = ''
for _f in self.FORMAT:
if 'name' not in _f:
continue
ss += ' {}: {}\n'.format(_name_to_string(_f['name']), getattr(self, _f['name']))
return ss
def as_table_string(self):
def _name_to_string(n):
return n.replace('_', ' ').capitalize()
ss = ''
offset = 0
for _f in self.FORMAT:
_sz = calcsize(_f['format'])
ss += ' {:04X} {:4s} {:>3d} '.format(offset, _f['format'], _sz)
if 'name' in _f and getattr(self, _f['name']) is not None:
ss += '{:30s} {}'.format(_name_to_string(_f['name']), getattr(self, _f['name']))
offset += _sz
ss += '\n'
return ss
def __len__(self):
fmt = "{}".format(self.endian)
for _f in self.FORMAT:
fmt += _f['format']
return calcsize(fmt)
def fixed_version(num):
""" Decode a fixed 16:16 bit floating point number into a version code.
:param num: fixed 16:16 floating point number as a 32-bit unsigned integer
:return: version number (float)
"""
if num == 0x00005000:
return 0.5
elif num == 0x00010000:
return 1.0
elif num == 0x00020000:
return 2.0
elif num == 0x00025000:
return 2.5
elif num == 0x00030000:
return 3.0
return num
def binary_search_parameters(length):
search_range = 1
entry_selector = 0
while search_range * 2 <= length:
search_range *= 2
entry_selector += 1
return entry_selector, search_range, length - search_range
class Range:
def __init__(self, start = 0, glyph=0):
self.start = start
self.expand(start)
self.start_glyph = glyph
self.iddelta = glyph - start
self.offset = 0
def is_consecutive(self, n, g):
return n == self.end and g == self.start_glyph + n - self.start
def expand(self, n):
self.end = (n + 1) & 0xffff
def __str__(self):
return "CMAP: {} - {} @ {}".format(self.start, self.end, self.iddelta)
def as_map(self):
# debugging....
return {n: n + self.iddelta for n in range(self.start, self.end)}
def char_list(self):
return range(self.start, self.end)
def char_to_glyph(self, char, fh):
if self.offset == 0:
return self.get_glyph(char)
ptr = self.get_offset(char)
fh.seek(ptr)
return self.get_glyph(unpack(">H", fh.read(2))[0])
def get_glyph(self, char):
if char < self.start or char > self.end:
return 0
return (char + self.iddelta) & 0xffff
def get_offset(self, char):
if char < self.start or char > self.end:
return 0
return self.offset + 2 * (char - self.start)
def read_list_int16(fh, n):
fmt = ">{}h".format(n)
return unpack(fmt, fh.read(calcsize(fmt)))
def read_list_uint16(fh, n):
fmt = ">{}H".format(n)
return unpack(fmt, fh.read(calcsize(fmt)))
def read_list_uint32(fh, n):
fmt = ">{}I".format(n)
return unpack(fmt, fh.read(calcsize(fmt)))
def ttf_checksum(data):
data += b'\0' * (len(data) % 4)
n_uint32 = int(len(data) / 4)
chksum = 0
for val in unpack(">{}I".format(n_uint32), data):
chksum += val
return chksum & 0xFFFFFFFF
#############################################################################
###
### Glyph Utilities...
###
#############################################################################
# Flag Constants
GF_ARG_1_AND_2_ARE_WORDS = (1 << 0)
GF_ARGS_ARE_XY_VALUES = (1 << 1)
GF_ROUND_XY_TO_GRID = (1 << 2)
GF_WE_HAVE_A_SCALE = (1 << 3)
GF_RESERVED = (1 << 4)
GF_MORE_COMPONENTS = (1 << 5)
GF_WE_HAVE_AN_X_AND_Y_SCALE = (1 << 6)
GF_WE_HAVE_A_TWO_BY_TWO = (1 << 7)
GF_WE_HAVE_INSTRUCTIONS = (1 << 8)
GF_USE_MY_METRICS = (1 << 9)
GF_OVERLAP_COMPOUND = (1 << 10)
GF_SCALED_COMPONENT_OFFSET = (1 << 11)
GF_UNSCALED_COMPONENT_OFFSET = (1 << 12)
def glyf_skip_format(flags):
""" Return the correct format for the data we will skip past based on flags set. """
skip = '>I' if flags & GF_ARG_1_AND_2_ARE_WORDS else '>H'
if flags & GF_WE_HAVE_A_SCALE:
return skip + 'H'
elif flags & GF_WE_HAVE_AN_X_AND_Y_SCALE:
return skip + 'I'
elif flags & GF_WE_HAVE_A_TWO_BY_TWO:
return skip + 'II'
return skip
def glyph_more_components(flag):
return flag & GF_MORE_COMPONENTS
def glyph_flags_decode(flag):
print("Glyph flag = {:04X}".format(flag))
if flag & GF_ARG_1_AND_2_ARE_WORDS:
print("GF_ARG_1_AND_2_ARE_WORDS")
if flag & GF_ARGS_ARE_XY_VALUES:
print("GF_ARGS_ARE_XY_VALUES")
if flag & GF_ROUND_XY_TO_GRID:
print("GF_ARGS_ROUND_XY_TO_GRID")
if flag & GF_WE_HAVE_A_SCALE:
print("GF_WE_HAVE_A_SCALE")
if flag & GF_RESERVED:
print("GF_RESERVED")
if flag & GF_MORE_COMPONENTS:
print("GF_MORE_COMPONENTS")
if flag & GF_WE_HAVE_AN_X_AND_Y_SCALE:
print("GF_WE_HAVE_AN_X_AND_Y_SCALE")
if flag & GF_WE_HAVE_A_TWO_BY_TWO:
print("GF_WE_HAVE_A_TWO_BY_TWO")
if flag & GF_WE_HAVE_INSTRUCTIONS:
print("GF_WE_HAVE_INSTRUCTIONS")
if flag & GF_USE_MY_METRICS:
print("GF_USE_MY_METRICS")
if flag & GF_OVERLAP_COMPOUND:
print("GF_OVERLAP_COMPOUND")
if flag & GF_SCALED_COMPONENT_OFFSET:
print("GF_SCALED_COMPONENT_OFFSET")
if flag & GF_UNSCALED_COMPONENT_OFFSET:
print("GF_UNSCALED_COMPONENT_OFFSET")