From fb8e82d33a4898c1546d3813f9794a7020b4544c Mon Sep 17 00:00:00 2001 From: david reid Date: Fri, 20 Mar 2015 18:10:40 +0000 Subject: [PATCH] First drop of code :-) --- .gitignore | 7 + README.md | 57 +++++ example.py | 23 ++ zttf/__init__.py | 0 zttf/objects.py | 571 +++++++++++++++++++++++++++++++++++++++++++++++ zttf/subset.py | 257 +++++++++++++++++++++ zttf/ttf.py | 272 ++++++++++++++++++++++ zttf/ttfile.py | 22 ++ zttf/utils.py | 247 ++++++++++++++++++++ 9 files changed, 1456 insertions(+) create mode 100644 example.py create mode 100644 zttf/__init__.py create mode 100644 zttf/objects.py create mode 100644 zttf/subset.py create mode 100644 zttf/ttf.py create mode 100644 zttf/ttfile.py create mode 100644 zttf/utils.py diff --git a/.gitignore b/.gitignore index db4561e..bf99a82 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,10 @@ docs/_build/ # PyBuilder target/ + +# Ignore PyCharm stuff +.idea + +# Ignore any fonts copied while testing! +*.ttf + diff --git a/README.md b/README.md index 486c7d9..e6ff4fa 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,59 @@ # zttf Python TTF file parser + +This was written to allow fonts to be parsed and then subsets generated for use in a PDF documents. + +It was developed using Python 3.4 and will work to a degree with Python 2 it needs additional testing and development there. + +## Simple Usage + +'''python +>>> from zttf.ttfile import TTFile +>>> font_file = TTFile('DroidSans.ttf') +>>> font_file.is_valid +True +>>> font_file.faces +[] +>>> face = font_file.faces[0] +>>> face.family_name +Droid Sans +>>> face.name +DroidSans +>>> face.italic_angle +0 +''' + +When used with a font collection, there will be multiple faces available. + +'''python +>>> from zttf.ttfile import TTFile +>>> font_file = TTFile('Futura.ttc') +>>> font_file.is_valid +True +>>> font_file.faces +[, , , ] +>>> font_file.faces[0].font_family +Futura +>>> font_file.faces[0].name +Futura-Medium +>>> font_file.faces[1].name +Futura-MediumItalic +>>> font_file.faces[2].name +Futura-CondensedMedium +>>> font_file.faces[3].name +Futura-CondensedExtraBold +''' + +Subsetting is done by passing in a subset of the characters desired. All required glyphs will be found and copied into the new file. + +'''python +>>> from zttf.ttfile import TTFile +>>> font_file = TTFile('Futura.ttc') +>>> subset = [ord('H'), ord('e'), ord('l'), ord('o')] +>>> sub_font = font_file.faces[0].make_subset(subset) +>>> sub_font.output() +... +>>> with open('new_font.ttf', 'wb') as fh: + fh.write(sub_font.output()) +''' + diff --git a/example.py b/example.py new file mode 100644 index 0000000..0427189 --- /dev/null +++ b/example.py @@ -0,0 +1,23 @@ +import sys +from zttf.ttfile import TTFile + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print("Usage: {} ".format(sys.argv[0])) + sys.exit(0) + + t = TTFile(sys.argv[1]) + print("Is valid? {}".format(t.is_valid)) + if not t.is_valid: + sys.exit(0) + + print(t.faces) + print(t.faces[0].font_family) + print(t.faces[0].name) + print(t.faces[0].italic_angle) + + subset = [ord('H'), ord('e'), ord('l'), ord('o')] + font_subset = t.faces[0].make_subset(subset) + with open('font_subset.ttf', 'wb') as fh: + fh.write(font_subset.output()) diff --git a/zttf/__init__.py b/zttf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/zttf/objects.py b/zttf/objects.py new file mode 100644 index 0000000..ab74a5e --- /dev/null +++ b/zttf/objects.py @@ -0,0 +1,571 @@ +# TrueType Font Glyph operators +from struct import unpack, calcsize +from zttf.utils import PackedFormat, fixed_version, read_list_uint16, Range, read_list_int16, glyph_more_components, \ + glyf_skip_format, ttf_checksum + + +TTF_NAMES = { + 0: 'Copyright Notice', + 1: 'Font Family Name', + 2: 'Font Subfamily Name', + 3: 'Unique Font Identifier', + 4: 'Full Font Name', + 5: 'Version String', + 6: 'Postscript Name', + 7: 'Trademark', + 8: 'Manufacturer Name', + 9: 'Designer', + 10: 'Description', + 11: 'Vendor URL', + 12: 'Designer URL', + 13: 'Licencee Description', + 14: 'Licence URL', + 15: 'Preferred Family', + 16: 'Preferred Subfamily', + 17: 'Compatible Full', + 18: 'Sample Text', + 19: 'PS CID findfont name', + 20: 'WWS Family Name', + 21: 'WWS Subfamily Name' +} + + +class TTFNameRecord(PackedFormat): + FORMAT = [ + {'name': 'platform_id', 'format': 'H'}, + {'name': 'encoding_id', 'format': 'H'}, + {'name': 'language_id', 'format': 'H'}, + {'name': 'name', 'format': 'H'}, + {'name': 'length', 'format': 'H'}, + {'name': 'offset', 'format': 'H'}, + ] + + def __init__(self, fh, data): + self.pos = fh.tell() + PackedFormat.__init__(self, fh) + self.raw = data[self.offset:self.offset + self.length] + self.value = self.raw + if self.platform_id == 1: + if self.encoding_id == 0: + self.value = self.raw.decode('iso-8859-1') + elif self.platform_id == 3: + if self.encoding_id == 1: + # UCS-2 + self.value = self.raw.decode('utf-16-be') + + def __str__(self): + return '{:08d} @ {:08X} - {:>30s}: {}'.format(self.pos, self.offset, + TTF_NAMES.get(self.name, 'Unknown Name {:X}'.format(self.name)), + self.value) + + +class TTF_name(PackedFormat): + FORMAT = [ + {'name': 'format', 'format': 'H'}, + {'name': 'count', 'format': 'H'}, + {'name': 'offset', 'format': 'H'}, + ] + + def __init__(self, fh, length): + start_pos = fh.tell() + PackedFormat.__init__(self, fh) + pos = fh.tell() + fh.seek(start_pos + self.offset) + data = fh.read(length - self.offset) + fh.seek(pos) + self.names = [] + for n in range(self.count): + self.names.append(TTFNameRecord(fh, data)) +# print("{} / {} - {}".format(n + 1, self.count, self.names[-1])) + + def get_name(self, name, default=None): + for n in self.names: + if n.name == name and n.platform_id == 1 and n.encoding_id == 0: + return n.value + return default + + +class TTFHeader(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'I', 'convert': fixed_version}, + {'name': 'num_tables', 'format': 'H'}, + {'name': 'search_range', 'format': 'H'}, + {'name': 'entry_selector', 'format': 'H'}, + {'name': 'range_shift', 'format': 'H'}, + ] + + def __init__(self, fh=None): + self.tables = [] + self.num_tables = 0 + PackedFormat.__init__(self, fh) + for n in range(self.num_tables): + self.tables.append(TTFOffsetTable(fh)) + + def check_version(self): + return self.version == 1 + + def get_tag(self, tag): + for t in self.tables: + if t.tag == tag: + return t + if tag == b'os2' and t.tag == b'OS/2': + return t + return None + + def dump_tables(self): + print("TTF Header Tables:") + for t in self.tables: + print(" {} @ {}".format(t.tag, t.offset)) + + +class TTF_kern(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'H'}, + {'name': 'num_tables', 'format': 'H'} + ] + def __init__(self, fh=None, length=None): + self.subtables = [] + PackedFormat.__init__(self, fh) + if fh is None: + return + for n in range(self.num_tables): + tbl = TTF_kern_subtable(fh) + fh.seek(tbl.length - len(tbl), 1) + self.subtables.append(tbl) + + +class TTF_kern_subtable(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'H'}, + {'name': 'length', 'format': 'H'}, + {'name': 'coverage', 'format': 'H'}, + ] + def __init__(self, fh=None): + if fh is not None: + self.offset = fh.tell() + PackedFormat.__init__(self, fh) + + + +class TTFOffsetTable(PackedFormat): + FORMAT = [ + {'name': 'tag', 'format': '4s'}, + {'name': 'checksum', 'format': 'I'}, + {'name': 'offset', 'format': 'I'}, + {'name': 'length', 'format': 'I'}, + ] + + def __str__(self): + return 'Offset Table: {} {} bytes @ {}'.format(self.tag, self.length, self.offset) + + def padded_length(self): + return self.length + 3 & ~ 3 + + def padded_data(self, data): + extra = self.padded_length() - len(data) + if extra > 0: + return data + '\0' * extra + return data + + def calculate_checksum(self, data): + self.checksum = ttf_checksum(data) + + +class TTF_head(PackedFormat): + FORMAT = [ + {'name': 'vers', 'format': 'i'}, + {'name': 'font_version', 'format': 'i'}, + {'name': 'checksum_adj', 'format': 'I'}, + {'name': 'magic_number', 'format': 'I'}, + {'name': 'flags', 'format': 'H'}, + {'name': 'units_per_em', 'format': 'H', 'convert': float}, + {'name': 'created', 'format': 'q'}, + {'name': 'modified', 'format': 'q'}, + {'name': 'x_min', 'format': 'h'}, + {'name': 'y_min', 'format': 'h'}, + {'name': 'x_max', 'format': 'h'}, + {'name': 'y_max', 'format': 'h'}, + {'name': 'mac_style', 'format': 'H'}, + {'name': 'lowest_rec_ppem', 'format': 'H'}, + {'name': 'direction_hint', 'format': 'H'}, + {'name': 'index_to_loc_format', 'format': 'h'}, + {'name': 'glyph_data_format', 'format': 'h'}, + ] + + @property + def bounding_box(self): + scale = 1000 / self.units_per_em + return [(self.x_min * scale), + (self.y_min * scale), + (self.x_max * scale), + (self.y_max * scale)] + + def decode_mac_style(self): + return { + 'bold': self.mac_style & 1 << 0, + 'italic': self.mac_style & 1, + 'underline': self.mac_style & 1 << 1, + 'outline': self.mac_style & 1 << 2, + 'shadow': self.mac_style & 1 << 3, + 'condensed': self.mac_style & 1 << 4, + 'extended': self.mac_style & 1 << 5 + } + + +class TTF_hhea(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'i', 'convert': fixed_version}, + {'name': 'ascender', 'format': 'h'}, + {'name': 'descender', 'format': 'h'}, + {'name': 'line_gap', 'format': 'h'}, + {'name': 'advance_width_max', 'format': 'H'}, + {'name': 'min_left_side_bearing', 'format': 'h'}, + {'name': 'min_right_dide_brearing', 'format': 'h'}, + {'name': 'x_max_extant', 'format': 'h'}, + {'name': 'caret_slope_rise', 'format': 'h'}, + {'name': 'caret_slope_run', 'format': 'h'}, + {'name': 'caret_offset', 'format': 'h'}, + {'name': 'reserved', 'format': 'q'}, + {'name': 'metric_data_format', 'format': 'h'}, + {'name': 'number_of_metrics', 'format': 'H'}, + ] + + +class TTF_os2(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'H'}, + {'name': 'xAvgCharWidth', 'format': 'h'}, + {'name': 'weight_class', 'format': 'H'}, + {'name': 'usWidthClass', 'format': 'H'}, + {'name': 'fsType', 'format': 'H'}, + {'name': 'ySubscriptXSize', 'format': 'h'}, + {'name': 'ySubscriptYSize', 'format': 'h'}, + {'name': 'ySubscriptXOffset', 'format': 'h'}, + {'name': 'ySubscriptYOffset', 'format': 'h'}, + {'name': 'ySuperscriptXSize', 'format': 'h'}, + {'name': 'ySuperscriptYSize', 'format': 'h'}, + {'name': 'ySuperscriptXOffset', 'format': 'h'}, + {'name': 'ySuperscriptYOffset', 'format': 'h'}, + {'name': 'yStrikeoutSize', 'format': 'h'}, + {'name': 'yStrikeoutPosition', 'format': 'h'}, + {'name': 'sFamilyClass', 'format': 'h'}, + {'name': 'panose', 'format': '10s'}, + {'name': 'ulUnicodeRange1', 'format': 'I'}, + {'name': 'ulUnicodeRange2', 'format': 'I'}, + {'name': 'ulUnicodeRange3', 'format': 'I'}, + {'name': 'ulUnicodeRange4', 'format': 'I'}, + {'name': 'achVendID', 'format': '4s'}, + {'name': 'fsSelection', 'format': 'H'}, + {'name': 'usFirstCharIndex', 'format': 'H'}, + {'name': 'usLastCharIndex', 'format': 'H'}, + {'name': 'sTypoAscender', 'format': 'h'}, + {'name': 'sTypoDescender', 'format': 'h'}, + {'name': 'typo_line_gap', 'format': 'h'}, + {'name': 'win_ascent', 'format': 'H'}, + {'name': 'win_descent', 'format': 'H'}, + {'name': 'ulCodePageRange1', 'format': 'I'}, + {'name': 'ulCodePageRange2', 'format': 'I'}, + {'name': 'sxHeight', 'format': 'h'}, + {'name': 'cap_height', 'format': 'h'}, + {'name': 'usDefaultChar', 'format': 'H'}, + {'name': 'usBreakChar', 'format': 'H'}, + {'name': 'usMaxContext', 'format': 'H'} + ] + + +class TTF_post(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'I', 'convert': fixed_version}, + {'name': 'italic_angle', 'format': 'I'}, + {'name': 'underline_position', 'format': 'h'}, + {'name': 'underline_thickness', 'format': 'h'}, + {'name': 'is_fixed_pitch', 'format': 'I'}, + {'name': 'min_mem_type42', 'format': 'I'}, + {'name': 'max_mem_type42', 'format': 'I'}, + {'name': 'min_mem_type1', 'format': 'I'}, + {'name': 'max_mem_type1', 'format': 'I'}, + + ] + + +class TTF_maxp(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'I', 'convert': fixed_version}, + {'name': 'num_glyphs', 'format': 'H'}, + ] + + +class TTF_cmap4(PackedFormat): + FORMAT = [ + {'name': 'language', 'format': 'H'}, + {'name': 'seg_count', 'format': 'H', 'convert': '_halve_'}, + {'name': 'src_range', 'format': 'H'}, + {'name': 'entry_selector', 'format': 'H'}, + {'name': 'range_shift', 'format': 'H'}, + ] + + @staticmethod + def _halve_(n): + return int(n / 2) + + class CMAPRange: + def __init__(self, start, end, delta, offset, n_segments): + self.start = start + self.end = end + self.delta = delta + self.offset = 0 if offset == 0 else int(offset / 2 - n_segments) + + def contains(self, n): + return self.start <= n <= self.end + + def coverage(self): + return range(self.start, self.end + 1) + + def char_to_glyph(self, n, glyphs): + if self.offset == 0: + return (n + self.delta) & 0xFFFF + idx = self.offset + n - self.start + if 0 < idx < len(glyphs): + print("Invalid index for glyphs! {}".format(idx)) + return 0 + return (glyphs[idx] + self.delta) & 0xFFFF + + def __init__(self, fh=None, length=None): + start = fh.tell() - 4 + PackedFormat.__init__(self, fh) + if fh is None: + return + self.ranges = [] + + end_codes = read_list_uint16(fh, self.seg_count + 1) + if end_codes[self.seg_count] != 0: + print("INVALID pad byte....") + return + start_codes = read_list_uint16(fh, self.seg_count) + iddelta = read_list_int16(fh, self.seg_count) + offset_start = fh.tell() + id_offset = read_list_uint16(fh, self.seg_count) + + ids_length = int((length - (fh.tell() - start)) / 2) + self.glyph_ids = read_list_uint16(fh, ids_length) + + for n in range(self.seg_count): + self.ranges.append(self.CMAPRange(start_codes[n], end_codes[n], iddelta[n], id_offset[n], self.seg_count - n)) + + def __len__(self): + return len(self.ranges) + + def char_to_glyph(self, char): + for r in self.ranges: + if not r.contains(char): + continue + return r.char_to_glyph(char, self.glyph_ids) + + def as_map(self, max_char): + cm = {} + for r in self.ranges: + if r.start > max_char: + continue + for c in range(r.start, max(r.end, max_char)): + cm[c] = r.char_to_glyph(c, self.glyph_ids) + return cm + + +class TTF_cmap6(PackedFormat): + FORMAT = [ + {'name': 'language', 'format': 'H'}, + {'name': 'first_code', 'format': 'H'}, + {'name': 'entry_count', 'format': 'H'}, + ] + + def __init__(self, fh, length): + PackedFormat.__init__(self, fh) + self.char_map = {} + self.glyph_map = {} + + mapping = read_list_uint16(fh, self.entry_count) + for n in range(self.entry_count): + self.char_map[n] = mapping[n] + self.glyph_map.setdefault(mapping[n], []).append(n) + + def __len__(self): + return len(self.char_map) + + +class TTF_cmap(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'H'}, + {'name': 'count', 'format': 'H'}, + ] + PREFS = [(0, 4), (0, 3), (3, 1)] + + def __init__(self, fh=None, length=0): + self.count = 0 + if fh: + start_pos = fh.tell() + PackedFormat.__init__(self, fh) + self.tables = {} + + self.map_table = None + + if self.count == 0: + return + + for n in range(self.count): + tbl = TTFcmapTable(fh) + self.tables[(tbl.platform_id, tbl.encoding_id)] = tbl + + pos = fh.tell() + + fh.seek(start_pos + tbl.offset) + tbl.format, length = read_list_uint16(fh, 2) + if tbl.format == 4: + tbl.map_data = TTF_cmap4(fh, length) + elif tbl.format == 6: + tbl.map_data = TTF_cmap6(fh, length) + fh.seek(pos) + + # Choose the mapping we are going to use, initially on preferences and + # then just fallback to first available map. + for p in self.PREFS: + if p in self.tables and self.tables[p].has_map_data: + self.map_table = self.tables[p].map_data + break + if self.map_table is None: + for t in self.tables.values(): + if t.has_map_data: + self.map_table = t.map_data + break + + def char_to_glyph(self, char, fh): + for p in self.PREFS: + if p in self.tables and self.tables[p].has_map_data: + for rng in self.tables[p].map_data.ranges: + if rng.end < char: + continue + if rng.start > char: + continue + return rng.char_to_glyph(char, fh) + + return None + + def char_map(self, max_char=256): + return self.map_table.as_map(max_char) + + def as_table_string(self): + s = PackedFormat.as_table_string(self) + n = 0 + for t in self.tables: + s += '\nTable: {}\n'.format(n) + s += t.as_table_string() + n += 1 + return s + + +class TTFcmapTable(PackedFormat): + FORMAT = [ + {'name': 'platform_id', 'format': 'H'}, + {'name': 'encoding_id', 'format': 'H'}, + {'name': 'offset', 'format': 'I'}, + ] + + def __init__(self, fh=None): + PackedFormat.__init__(self, fh) + self.format = 0 + self.map_data = None + self.position = 0 + + @property + def has_map_data(self): + return self.map_data is not None and len(self.map_data) > 0 + + def as_map(self, max_char): + cm = {} + for r in self.map_data.ranges: + cm.update(r.as_map(max_char)) + return cm + + +class TTF_glyf(PackedFormat): + FORMAT = [ + {'name': 'contours', 'format': 'h'}, + {'name': 'x_min', 'format': 'h'}, + {'name': 'y_min', 'format': 'h'}, + {'name': 'x_max', 'format': 'h'}, + {'name': 'y_max', 'format': 'h'}, + ] + + def __init__(self, fh=None, num=0, data=None): + self.glyph = num + self.components = [] + self.required = set() + PackedFormat.__init__(self, fh=fh, data=data) + + # If the glyph is a compound glyph, ie it's made up of parts of other glyphs, + # then we need to ensure we have all the component glyphs listed. + if self.contours < 0: + while True: + (flags, next_glyph) = read_list_uint16(fh, 2) + self.required.add(next_glyph) + fh.read(calcsize(glyf_skip_format(flags))) + if not glyph_more_components(flags): + break + + def is_compound(self): + return self.contours < 0 + + def glyph_set(self): + rqd = set(self.required) + for c in self.components: + rqd.extend(c.required) + return sorted(rqd) + + +class TTFCollectionHeader(PackedFormat): + FORMAT = [ + {'name': 'tag', 'format': '4s'}, + {'name': 'version', 'format': 'I', 'convert': fixed_version}, + {'name': 'count', 'format': 'I'} + ] + + def __init__(self, fh): + PackedFormat.__init__(self, fh) + self.offsets = [] + self.is_collection = (self.tag == b'ttcf') + if self.is_collection: + for i in range(self.count): + self.offsets.append(unpack('>I', fh.read(4))[0]) + else: + self.count = 1 + self.offsets = [0] + if self.version == 2: + self.dsig_tag, self.dsig_length, self.dsig_offset = unpack("III", fh.read(calcsize('III'))) + + +class TTF_gpos(PackedFormat): + FORMAT = [ + {'name': 'version', 'format': 'I', 'convert': fixed_version}, + {'name': 'script_list', 'format': 'H'}, + {'name': 'feature_list', 'format': 'H'}, + {'name': 'lookup_list', 'format': 'H'}, + ] + + +class OFT_ScriptList(PackedFormat): + FORMAT = [ + {'name': 'count', 'format': 'H'} + ] + + def __init__(self, fh, length=None): + self.records = [] + PackedFormat.__init__(self, fh) + for n in range(self.count): + self.records.append(OFT_ScriptRecord(fh)) + + +class OFT_ScriptRecord(PackedFormat): + FORMAT = [ + {'name': 'tag', 'format': '4s'}, + {'name': 'offset', 'format': 'H'} + ] + diff --git a/zttf/subset.py b/zttf/subset.py new file mode 100644 index 0000000..0103c73 --- /dev/null +++ b/zttf/subset.py @@ -0,0 +1,257 @@ +from io import BytesIO +from struct import pack, unpack, calcsize, error as struct_error +from zttf.objects import TTF_post, TTFHeader, TTFOffsetTable, TTF_kern, TTF_kern_subtable +from zttf.utils import Range, glyph_more_components, glyf_skip_format, ttf_checksum, binary_search_parameters + + +class TTFSubset: + def __init__(self, parent, subset): + self.parent = parent + self.subset = subset + + self.tables = {} + # We need to build 2 maps, one for character -> glyph and one + # for glyph -> character + self.orig_char_to_glyph = {} + self.orig_glyph_to_char = {} + self.glyph_map = {} + + self.char_to_glyph = {} + self.glyph_to_char = {} + self.cmap_ranges = [] + + self.required_glyphs = [0] + self.metrics = [] + + self.fh = None + + def start_table(self, tag, data=None): + b = BytesIO() + if data is not None: + b.write(data) + self.tables[tag] = b + return b + + def find_glyph_subset(self): + for s in self.subset: + self.parent.char_to_glyph(s) + + char_to_glyphs = self.parent.get_table(b'cmap').char_map() + rqd = [] + for code in self.subset: + glyph = char_to_glyphs.get(code) + if glyph is None: + print("Unknown character in parent mapping: {}".format(code)) + continue +# print("character {} is glyph {}".format(code, glyph)) + self.orig_char_to_glyph[code] = glyph + self.orig_glyph_to_char.setdefault(glyph, []).append(code) + if glyph not in rqd: + rqd.append(glyph) + + for glyph in rqd: + self.required_glyphs.append(glyph) + self.required_glyphs.extend(self.parent.get_glyph_components(glyph)) + + self.required_glyphs.sort() + + self.glyph_map = {} + for rg in self.required_glyphs: + glyph = len(self.glyph_map) + 1 + self.glyph_map[rg] = glyph + if rg in self.orig_glyph_to_char: + for cc in self.orig_glyph_to_char[rg]: + self.char_to_glyph[cc] = glyph + self.glyph_to_char[glyph] = self.orig_glyph_to_char[rg] + + def copy_tables(self): + for tag in [b'name', b'cvt', b'fpgm', b'prep', b'gasp']: + if tag in self.parent.tables: + buff = self.start_table(tag) + tbl = self.parent.header.get_tag(tag) + self.fh.seek(tbl.offset) + buff.write(self.fh.read(tbl.length)) + + new_post = TTF_post() + for f in ['italic_angle', 'underline_position', 'Underline_thickness', 'is_fixed_pitch']: + setattr(new_post, f, self.parent.get_table_attr(b'post', f)) + self.start_table(b'post', new_post.as_bytes()) + + head = self.parent.copy_table(b'head') + head.checksum_adj = 0 + head.index_to_loc_format = 0 + self.start_table(b'head', head.as_bytes()) + + hhea = self.parent.copy_table(b'hhea') + hhea.number_of_metrics = len(self.metrics) + self.start_table(b'hhea', hhea.as_bytes()) + + maxp = self.parent.copy_table(b'maxp') + maxp.b_glyphs = len(self.required_glyphs) + self.start_table(b'maxp', maxp.as_bytes()) + + self.start_table(b'os2', self.parent.copy_table(b'os2').as_bytes()) + # todo - is it worth finding a way to subset the GPOS and LTSH tables? + + def build_cmap_ranges(self): + # As we will likely have a scattered map we will use CMAP Format 4. + # We take the character mappings we have and build 4 lists... + # start code + # end code + # id delta + # range offset + self.cmap_ranges = [] + for cc, glyph in sorted(self.char_to_glyph.items()): + try: + current = self.cmap_ranges[-1] + if current is None or not current.is_consecutive(cc, glyph): + self.cmap_ranges.append(Range(cc, glyph)) + else: + current.expand(cc) + except IndexError: + self.cmap_ranges.append(Range(cc, glyph)) + + def add_cmap_table(self): + if self.cmap_ranges == []: + self.build_cmap_ranges() + self.cmap_ranges.append(Range(0xffff, 0)) + self.cmap_ranges[-1].iddelta = 0 + + seg_count = len(self.cmap_ranges) + src_range, entry_selector, range_shift = binary_search_parameters(seg_count * 2) + length = 16 + 8 * seg_count + len(self.glyph_to_char) + 1 + + data = [ + 0, # version + 1, # number of subtables + 3, # platform id (MS) + 1, # endocing id (Unicode) + 0, 12, # subtable location + # subtable + 4, # format + length, # length + 0, # language + seg_count * 2, # seg count * 2 + src_range, # search range (2 ** floor(log2(seg_count))) + entry_selector, # entry selector log2(src_range / 2) + seg_count * 2 - src_range, # range shift ( 2 * seg_count - search_range) + ] + data.extend([r.end for r in self.cmap_ranges]) + data.append(0) + data.extend([r.start for r in self.cmap_ranges]) + + buff = self.start_table(b'cmap') + buff.write(pack(">{}H".format(len(data)), *data)) + buff.write(pack(">{}h".format(len(self.cmap_ranges)), *[r.iddelta for r in self.cmap_ranges])) + buff.write(pack(">{}H".format(len(self.cmap_ranges)), *[r.offset for r in self.cmap_ranges])) + buff.write(pack(">{}H".format(len(self.cmap_ranges)), *[r.start_glyph for r in self.cmap_ranges])) + + def get_glyphs(self): + locations = [] + self.metrics = [] + buff = self.start_table(b'glyf') + for g in self.required_glyphs: + locations.append(int(buff.tell() / 2)) + data = self.parent.get_glyph_data(g) + if data == b'': + continue + if unpack(">h", data[:2])[0] == -1: + # need to adjust glyph index... + pos = 10 + while True: + flags, next_glyph = unpack(">HH", data[pos: pos + 4]) + data = data[:pos + 2] + pack(">H", self.glyph_map[next_glyph]) + data[pos+4:] + pos += 4 + calcsize(glyf_skip_format(flags)) + if not glyph_more_components(flags): + break + buff.write(data) + self.metrics.append(self.parent.glyph_metrics[g]) + loca = self.start_table(b'loca') + loca.write(pack(">{}H".format(len(locations)), *locations)) + + hmtx = self.start_table(b'hmtx') + for m in self.metrics: + hmtx.write(pack(">Hh", *m)) + + def add_kern_data(self): + entries = {} + + for k, diff in self.parent.glyph_kern.items(): + if k[0] not in self.required_glyphs or k[1] not in self.required_glyphs: + continue +# print("mapping {} to ({}, {})".format(k, self.glyph_map[k[0]], self.glyph_map[k[1]])) + entries[(self.glyph_map[k[0]], self.glyph_map[k[1]])] = diff + if len(entries) == 0: + return + + kern = self.start_table(b'kern') + kh = TTF_kern() + kh.version = 0 + kh.num_tables = 1 + kern.write(kh.as_bytes()) + st = TTF_kern_subtable() + st.length = len(st) + 6 * len(entries) + 8 + st.version = 0 + st.coverage = 1 + kern.write(st.as_bytes()) + kern.write(pack(">H", len(entries))) + kern.write(pack(">HHH", *binary_search_parameters(len(entries)))) + for key, diff in entries.items(): + kern.write(pack(">HHh", key[0], key[1], diff)) + + # Put the TTF file together + def output(self): + """ Generate a binary based on the subset we have been given. """ + + self.fh = open(self.parent.filename, 'rb') + self.fh.seek(self.parent.start_pos) + + self.find_glyph_subset() + self.add_kern_data() + self.copy_tables() + self.add_cmap_table() + self.get_glyphs() +# self.dump_tables() + + self.fh.close() + + header = TTFHeader() + header.num_tables = len(self.tables) + header.version_raw = 0x00010000 + + output = BytesIO() + header.entry_selector, header.search_range, header.range_shift = binary_search_parameters(len(self.tables)) + output.write(header.as_bytes()) + + head_offset = 0 + offset = output.tell() + 16 * len(self.tables) + sorted_tables = sorted(self.tables.keys()) + for tag in sorted_tables: + if tag == b'head': + head_offset = offset + tbl = TTFOffsetTable() + tbl.tag = tag + tbl.offset = offset + data = self.tables[tag].getvalue() + tbl.length = len(data) + tbl.calculate_checksum(data) + offset += tbl.padded_length() + output.write(tbl.as_bytes()) + + for tag in sorted_tables: + data = self.tables[tag].getvalue() + data += b'\0' * (len(data) % 4) + output.write(data) + + checksum = 0xB1B0AFBA - ttf_checksum(output.getvalue()) + data = output.getvalue() + try: + data = data[:head_offset + 8] + pack(">I", checksum) + data[head_offset + 12:] + except struct_error: + data = data[:head_offset + 8] + pack(">i", checksum) + data[head_offset + 12:] + return data + + def dump_tables(self): + for n in sorted(self.tables): + print("{} {} bytes".format(n, self.tables[n].tell())) + diff --git a/zttf/ttf.py b/zttf/ttf.py new file mode 100644 index 0000000..e0ed83e --- /dev/null +++ b/zttf/ttf.py @@ -0,0 +1,272 @@ +from copy import copy +from struct import calcsize, unpack + +from zttf.objects import TTFHeader, TTF_head, TTF_name, TTF_hhea, TTF_os2, TTF_post, TTF_maxp, TTF_cmap, TTF_glyf, \ + TTF_kern +from zttf.subset import TTFSubset +from zttf.utils import read_list_uint16, read_list_uint32 + + +class TTFont(object): + def __init__(self, filename, offset): + self.header = None + self.tables = {} + self.filename = filename + self.start_pos = offset + + self.idx_format = 0 + self.n_glyphs = 0 + self.glyph_metrics = [] + self.glyph_kern = {} + + self.file_handle = None + self.parse() + + def parse(self): + self._open() + self.header = self._read_class(TTFHeader) + if not self.header.check_version(): + return + + self.get_table(b'head', TTF_head) + self.get_table(b'name', TTF_name) + self.get_table(b'hhea', TTF_hhea) + self.get_table(b'os2', TTF_os2) + self.get_table(b'post', TTF_post) + self.get_table(b'maxp', TTF_maxp) + self.get_table(b'cmap', TTF_cmap) + self.get_table(b'kern', TTF_kern) + + self.idx_format = self.get_table_attr(b'head', 'index_to_loc_format') + self.n_glyphs = self.get_table_attr(b'maxp', 'num_glyphs', 0) + + self.get_hmtx() + self.get_loca() + if b'kern' in self.tables: + self.get_kern_data() + + self._close() + + COMMON_DATA = { + 'font_family': (b'name', 1), + 'name': (b'name', 6), + 'ascender': (b'hhea', 'ascender'), + 'descender': (b'hhea', 'descender'), + 'units_per_em': (b'head', 'units_per_em', 1000), + 'cap_height': (b'os2', 'cap_height', 0), + 'bounding_box': (b'head', 'bounding_box'), + 'italic_angle': (b'post', 'italic_angle'), + 'underline_position': (b'post', 'underline_position'), + 'underline_thickness': (b'post', 'underline_thickness'), + 'weight_class': (b'os2', 'weight_class'), + 'line_gap': (b'hhea', 'line_gap'), + 'typo_line_gap': (b'os2', 'typo_line_gap'), + 'win_ascent': (b'os2', 'win_ascent'), + 'win_descent': (b'os2', 'win_descent') + } + + def __getattr__(self, item): + if item in self.COMMON_DATA: + how = self.COMMON_DATA[item] + if how[0] == b'name': + return self.get_name_table(*how[1:]) + if len(how) > 2: + return self.get_table_attr(*how[:3]) + return self.get_table_attr(*how) + + @property + def stemv(self): + return 50 + int(pow((self.weight_class / 65.0), 2)) + + @property + def italic(self): + return self.italic_angle != 0 + + def get_string_width(self, string): + width = 0 + for n in range(len(string)): + glyph = self.char_to_glyph(ord(string[n])) + (aw, lsb) = self.glyph_metrics[glyph] + width += aw + if n == 0: + width -= lsb + elif n < len(string) - 1: + glyf2 = self.char_to_glyph(ord(string[n + 1])) + width += self.glyph_kern.get((glyph, glyf2), 0) + return width + + def get_char_width(self, char): + if isinstance(char, str): + char = ord(char) + idx = self.char_to_glyph(char) + if 0 < idx < len(self.glyph_metrics): + idx = 0 + return self.glyph_metrics[idx][0] + + # Internal Table Functions + def get_table(self, tag, obj_class=None): + tbl_obj = self.tables.get(tag) + if tbl_obj is None and obj_class is not None: + tbl = self.header.get_tag(tag) + if tbl is None: + return None + orig_pos = self._seek(tbl.offset) + tbl_obj = self._read_class(obj_class, tbl.length) + self.tables[tag] = tbl_obj + self._seek(orig_pos) + return tbl_obj + + def get_table_attr(self, tbl, attr, default=None): + if tbl not in self.tables: + return default + return getattr(self.tables[tbl], attr, default) + + def get_name_table(self, n_attr, default=None): + if b'name' not in self.tables: + return default + return self.tables[b'name'].get_name(n_attr, default) + + def copy_table(self, tag): + tbl = self.get_table(tag) + return copy(tbl) + + def _get_table_offset(self, tag): + tbl = self.header.get_tag(tag) + return tbl.offset if tbl is not None else 0 + + def get_hmtx(self): + """ Read the glyph metrics. """ + n_metrics = self.get_table_attr(b'hhea', 'number_of_metrics') + + offset = self._get_table_offset(b'hmtx') + if offset == 0: + return False + self._seek(offset) + aw = 0 + for n in range(n_metrics): + aw, lsb = unpack(">Hh", self.file_handle.read(4)) + self.glyph_metrics.append((aw, lsb)) + # Now we have read the aw and lsb for specific glyphs, we need to read additional + # lsb data. + extra = self.n_glyphs - n_metrics + if extra > 0: + lsbs = self._read_list_int16(extra) + for n in range(extra): + self.glyph_metrics.append((aw, lsbs[n])) + + def get_loca(self,): + start = self._get_table_offset(b'loca') + self._seek(start) + if self.idx_format == 0: + self.tables[b'loca'] = [n * 2 for n in self._read_list_uint16(self.n_glyphs + 1)] + elif self.idx_format == 1: + self.tables[b'loca'] = self._read_list_uint32(self.n_glyphs + 1) + + def get_kern_data(self): + kern = self.get_table(b'kern') + for st in kern.subtables: + if st.coverage != 1 or st.version != 0: + print("coverage = {}, version = {} - skipping".format(st.coverage, st.version)) + continue + self._seek(st.offset + len(st)) + (npairs, a, b, c) = self._read_list_uint16(4) + for n in range(npairs): + (l, r) = self._read_list_uint16(2) + diff = self._read_int16() + self.glyph_kern[(l, r)] = diff + + def char_to_glyph(self, char): + self._open() + cmap = self.get_table(b'cmap') + glyph = cmap.char_to_glyph(char, self.file_handle) + return glyph or 0 + + def get_glyph_position(self, glyph): + loca = self.get_table(b'loca') + return loca[glyph] + + def get_glyph_components(self, glyph): + """ Return a list of any component glyphs required. """ + if glyph < 0 or glyph > self.n_glyphs: + print("Missing glyph!!! {}".format(glyph)) + return [] + pos = self._get_table_offset(b'glyf') + self.get_glyph_position(glyph) + glyf = self._read_class(TTF_glyf, offset=pos, length=glyph) + for g in glyf.required: + for extra_glyph in self.get_glyph_components(g): + if extra_glyph not in glyf.required: + glyf.required.append(extra_glyph) + return sorted(glyf.required) + + def get_glyph_data(self, glyph): + data_start = self._get_table_offset(b'glyf') + glyph_start = self.get_glyph_position(glyph) + glyph_length = self.get_glyph_position(glyph + 1) - glyph_start + if glyph_length == 0: + print("Zero length glyph @ {}".format(glyph)) + return b'' + self._open() + self.file_handle.seek(data_start + glyph_start) + return self.file_handle.read(glyph_length) + + def get_binary_table(self, tag): + tbl = self.header.get_tag(tag) + print(tbl) + if tbl is None: + return b'' + self._open() + self._seek(tbl.offset) + return self.file_handle.read(tbl.length) + + def make_subset(self, subset): + """ Given a subset of characters, create a subset of the full TTF file suitable for + inclusion in a PDF. + :param subset: List of characters to include. + :return: TTFSubset object + """ + return TTFSubset(self, subset) + + + # File functions. + def _open(self): + if self.file_handle is None: + self.file_handle = open(self.filename, 'rb') + self.file_handle.seek(self.start_pos) + + def _close(self): + if self.file_handle is not None: + self.file_handle.close() + self.file_handle = None + + def _seek(self, offset, whence=0): + self._open() + pos = self.file_handle.tell() + self.file_handle.seek(offset, whence) + return pos + + def _read_class(self, cls, length=None, offset=None): + if offset is not None: + self._seek(offset) + if length is not None: + return cls(self.file_handle, length) + return cls(self.file_handle) + + def _skip(self, offset): + if self.file_handle is not None: + self.file_handle.seek(offset, 1) + + def _read_list_int16(self, n): + _fmt = ">{}h".format(n) + return unpack(_fmt, self.file_handle.read(calcsize(_fmt))) + + def _read_list_uint16(self, n): + return read_list_uint16(self.file_handle, n) + + def _read_uint16(self): + return unpack(">H", self.file_handle.read(2))[0] + + def _read_int16(self): + return unpack(">h", self.file_handle.read(2))[0] + + def _read_list_uint32(self, n): + return read_list_uint32(self.file_handle, n) diff --git a/zttf/ttfile.py b/zttf/ttfile.py new file mode 100644 index 0000000..f006e7c --- /dev/null +++ b/zttf/ttfile.py @@ -0,0 +1,22 @@ +from os.path import exists, getsize + +from zttf.objects import TTFCollectionHeader +from zttf.ttf import TTFont + + +class TTFile(object): + def __init__(self, filename): + self.filename = filename + self.faces = [] + + if not exists(filename) or getsize(filename) == 0: + raise IOError("The file '{}' does not exist or is empty".format(filename)) + + with open(self.filename, 'rb') as fh: + hdr = TTFCollectionHeader(fh) + for off in hdr.offsets: + self.faces.append(TTFont(filename, off)) + + @property + def is_valid(self): + return len(self.faces) > 0 diff --git a/zttf/utils.py b/zttf/utils.py new file mode 100644 index 0000000..aa73753 --- /dev/null +++ b/zttf/utils.py @@ -0,0 +1,247 @@ +from struct import calcsize, pack, unpack + + +class PackedFormat: + """ Class to allow simpler extraction of data from a stream into an object with + named attributes. + All child classes need a FORMAT list of dicts describing the data to be extracted. + + """ + FORMAT = [] + + def __init__(self, fh=None, data=None, endian='>'): + self.endian = endian + self.parsed = False + if fh is not None: + self.from_file(fh) + elif data is not None: + self.from_data(data) + + def from_file(self, fh): + for _f in self.FORMAT: + if 'format' not in _f: + continue + _fmt = '{}{}'.format(self.endian, _f['format']) + _data = unpack(_fmt, fh.read(calcsize(_fmt)))[0] + if 'name' not in _f: + continue + if 'convert' in _f: + setattr(self, _f['name'] + '_raw', _data) + _fn = _f['convert'] if callable(_f['convert']) else getattr(self, _f['convert']) + if _fn is not None and callable(_fn): + _data = _fn(_data) + setattr(self, _f['name'], _data) + self.parsed = True + + def from_data(self, data): + offset = 0 + for _f in self.FORMAT: + if 'format' not in _f: + continue + _fmt = '{}{}'.format(self.endian, _f['format']) + _data = unpack(_fmt, data[offset: offset + calcsize(_fmt)])[0] + setattr(self, _f['name'], _data) + offset += calcsize(_fmt) + self.parsed = True + + def as_bytes(self): + output = b'' + for _f in self.FORMAT: + _fmt = '{}{}'.format(self.endian, _f['format']) + if 'convert' in _f: + _val = getattr(self, _f['name'] + '_raw', '' if 's' in _f['format'] else 0) + else: + _val = getattr(self, _f['name'], '' if 's' in _f['format'] else 0) + output += pack(_fmt, _val) + return output + + def as_string(self): + def _name_to_string(n): + return n.replace('_', ' ').capitalize() + ss = '' + for _f in self.FORMAT: + if 'name' not in _f: + continue + ss += ' {}: {}\n'.format(_name_to_string(_f['name']), getattr(self, _f['name'])) + return ss + + def as_table_string(self): + def _name_to_string(n): + return n.replace('_', ' ').capitalize() + ss = '' + offset = 0 + for _f in self.FORMAT: + _sz = calcsize(_f['format']) + ss += ' {:04X} {:4s} {:>3d} '.format(offset, _f['format'], _sz) + if 'name' in _f and getattr(self, _f['name']) is not None: + ss += '{:30s} {}'.format(_name_to_string(_f['name']), getattr(self, _f['name'])) + offset += _sz + ss += '\n' + return ss + + def __len__(self): + fmt = "{}".format(self.endian) + for _f in self.FORMAT: + fmt += _f['format'] + return calcsize(fmt) + + +def fixed_version(num): + """ Decode a fixed 16:16 bit floating point number into a version code. + :param num: fixed 16:16 floating point number as a 32-bit unsigned integer + :return: version number (float) + """ + if num == 0x00005000: + return 0.5 + elif num == 0x00010000: + return 1.0 + elif num == 0x00020000: + return 2.0 + elif num == 0x00025000: + return 2.5 + elif num == 0x00030000: + return 3.0 + return num + + +def binary_search_parameters(length): + search_range = 1 + entry_selector = 0 + while search_range * 2 <= length: + search_range *= 2 + entry_selector += 1 + return entry_selector, search_range, length - search_range + + +class Range: + def __init__(self, start = 0, glyph=0): + self.start = start + self.expand(start) + self.start_glyph = glyph + self.iddelta = glyph - start + self.offset = 0 + + def is_consecutive(self, n, g): + return n == self.end and g == self.start_glyph + n - self.start + + def expand(self, n): + self.end = (n + 1) & 0xffff + + def __str__(self): + return "CMAP: {} - {} @ {}".format(self.start, self.end, self.iddelta) + + def as_map(self): + # debugging.... + return {n: n + self.iddelta for n in range(self.start, self.end)} + + def char_list(self): + return range(self.start, self.end) + + def char_to_glyph(self, char, fh): + if self.offset == 0: + return self.get_glyph(char) + ptr = self.get_offset(char) + fh.seek(ptr) + return self.get_glyph(unpack(">H", fh.read(2))[0]) + + def get_glyph(self, char): + if char < self.start or char > self.end: + return 0 + return (char + self.iddelta) & 0xffff + + def get_offset(self, char): + if char < self.start or char > self.end: + return 0 + return self.offset + 2 * (char - self.start) + + +def read_list_int16(fh, n): + fmt = ">{}h".format(n) + return unpack(fmt, fh.read(calcsize(fmt))) + + +def read_list_uint16(fh, n): + fmt = ">{}H".format(n) + return unpack(fmt, fh.read(calcsize(fmt))) + + +def read_list_uint32(fh, n): + fmt = ">{}I".format(n) + return unpack(fmt, fh.read(calcsize(fmt))) + + +def ttf_checksum(data): + data += b'\0' * (len(data) % 4) + n_uint32 = int(len(data) / 4) + chksum = 0 + for val in unpack(">{}I".format(n_uint32), data): + chksum += val + return chksum & 0xFFFFFFFF + + +############################################################################# +### +### Glyph Utilities... +### +############################################################################# + +# Flag Constants +GF_ARG_1_AND_2_ARE_WORDS = (1 << 0) +GF_ARGS_ARE_XY_VALUES = (1 << 1) +GF_ROUND_XY_TO_GRID = (1 << 2) +GF_WE_HAVE_A_SCALE = (1 << 3) +GF_RESERVED = (1 << 4) +GF_MORE_COMPONENTS = (1 << 5) +GF_WE_HAVE_AN_X_AND_Y_SCALE = (1 << 6) +GF_WE_HAVE_A_TWO_BY_TWO = (1 << 7) +GF_WE_HAVE_INSTRUCTIONS = (1 << 8) +GF_USE_MY_METRICS = (1 << 9) +GF_OVERLAP_COMPOUND = (1 << 10) +GF_SCALED_COMPONENT_OFFSET = (1 << 11) +GF_UNSCALED_COMPONENT_OFFSET = (1 << 12) + + +def glyf_skip_format(flags): + """ Return the correct format for the data we will skip past based on flags set. """ + skip = '>I' if flags & GF_ARG_1_AND_2_ARE_WORDS else '>H' + if flags & GF_WE_HAVE_A_SCALE: + return skip + 'H' + elif flags & GF_WE_HAVE_AN_X_AND_Y_SCALE: + return skip + 'I' + elif flags & GF_WE_HAVE_A_TWO_BY_TWO: + return skip + 'II' + return skip + + +def glyph_more_components(flag): + return flag & GF_MORE_COMPONENTS + + +def glyph_flags_decode(flag): + print("Glyph flag = {:04X}".format(flag)) + if flag & GF_ARG_1_AND_2_ARE_WORDS: + print("GF_ARG_1_AND_2_ARE_WORDS") + if flag & GF_ARGS_ARE_XY_VALUES: + print("GF_ARGS_ARE_XY_VALUES") + if flag & GF_ROUND_XY_TO_GRID: + print("GF_ARGS_ROUND_XY_TO_GRID") + if flag & GF_WE_HAVE_A_SCALE: + print("GF_WE_HAVE_A_SCALE") + if flag & GF_RESERVED: + print("GF_RESERVED") + if flag & GF_MORE_COMPONENTS: + print("GF_MORE_COMPONENTS") + if flag & GF_WE_HAVE_AN_X_AND_Y_SCALE: + print("GF_WE_HAVE_AN_X_AND_Y_SCALE") + if flag & GF_WE_HAVE_A_TWO_BY_TWO: + print("GF_WE_HAVE_A_TWO_BY_TWO") + if flag & GF_WE_HAVE_INSTRUCTIONS: + print("GF_WE_HAVE_INSTRUCTIONS") + if flag & GF_USE_MY_METRICS: + print("GF_USE_MY_METRICS") + if flag & GF_OVERLAP_COMPOUND: + print("GF_OVERLAP_COMPOUND") + if flag & GF_SCALED_COMPONENT_OFFSET: + print("GF_SCALED_COMPONENT_OFFSET") + if flag & GF_UNSCALED_COMPONENT_OFFSET: + print("GF_UNSCALED_COMPONENT_OFFSET")