First drop of code :-)

2015-03-20 18:10:40 +00:00
parent 7dffa90495
commit fb8e82d33a
9 changed files with 1456 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -52,3 +52,10 @@ docs/_build/

 # PyBuilder
 target/
+
+# Ignore PyCharm stuff
+.idea
+
+# Ignore any fonts copied while testing!
+*.ttf
+
--- a/README.md
+++ b/README.md
@@ -1,2 +1,59 @@
 # zttf
 Python TTF file parser
+
+This was written to allow fonts to be parsed and then subsets generated for use in a PDF documents.
+
+It was developed using Python 3.4 and will work to a degree with Python 2 it needs additional testing and development there.
+
+## Simple Usage
+
+'''python
+>>> from zttf.ttfile import TTFile
+>>> font_file = TTFile('DroidSans.ttf')
+>>> font_file.is_valid
+True
+>>> font_file.faces
+[<zttf.ttf.TTFont object at 0x7f3569b73b50>]
+>>> face = font_file.faces[0]
+>>> face.family_name
+Droid Sans
+>>> face.name
+DroidSans
+>>> face.italic_angle
+0
+'''
+
+When used with a font collection, there will be multiple faces available.
+
+'''python
+>>> from zttf.ttfile import TTFile
+>>> font_file = TTFile('Futura.ttc')
+>>> font_file.is_valid
+True
+>>> font_file.faces
+[<zttf.ttf.TTFont object at 0x7fc97520bc50>, <zttf.ttf.TTFont object at 0x7fc97520bc90>, <zttf.ttf.TTFont object at 0x7fc97520bd90>, <zttf.ttf.TTFont object at 0x7fc973b4c190>]
+>>> font_file.faces[0].font_family
+Futura
+>>> font_file.faces[0].name
+Futura-Medium
+>>> font_file.faces[1].name
+Futura-MediumItalic
+>>> font_file.faces[2].name
+Futura-CondensedMedium
+>>> font_file.faces[3].name
+Futura-CondensedExtraBold
+'''
+
+Subsetting is done by passing in a subset of the characters desired. All required glyphs will be found and copied into the new file.
+
+'''python
+>>> from zttf.ttfile import TTFile
+>>> font_file = TTFile('Futura.ttc')
+>>> subset = [ord('H'), ord('e'), ord('l'), ord('o')]
+>>> sub_font = font_file.faces[0].make_subset(subset)
+>>> sub_font.output()
+...
+>>> with open('new_font.ttf', 'wb') as fh:
+        fh.write(sub_font.output())
+'''
+
--- a/example.py
+++ b/example.py
@@ -0,0 +1,23 @@
+import sys
+from zttf.ttfile import TTFile
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print("Usage: {} <font filename>".format(sys.argv[0]))
+        sys.exit(0)
+
+    t = TTFile(sys.argv[1])
+    print("Is valid? {}".format(t.is_valid))
+    if not t.is_valid:
+        sys.exit(0)
+
+    print(t.faces)
+    print(t.faces[0].font_family)
+    print(t.faces[0].name)
+    print(t.faces[0].italic_angle)
+
+    subset = [ord('H'), ord('e'), ord('l'), ord('o')]
+    font_subset = t.faces[0].make_subset(subset)
+    with open('font_subset.ttf', 'wb') as fh:
+        fh.write(font_subset.output())
--- a/zttf/init.py
+++ b/zttf/init.py
--- a/zttf/objects.py
+++ b/zttf/objects.py
@@ -0,0 +1,571 @@
+# TrueType Font Glyph operators
+from struct import unpack, calcsize
+from zttf.utils import PackedFormat, fixed_version, read_list_uint16, Range, read_list_int16, glyph_more_components, \
+    glyf_skip_format, ttf_checksum
+
+
+TTF_NAMES = {
+    0: 'Copyright Notice',
+    1: 'Font Family Name',
+    2: 'Font Subfamily Name',
+    3: 'Unique Font Identifier',
+    4: 'Full Font Name',
+    5: 'Version String',
+    6: 'Postscript Name',
+    7: 'Trademark',
+    8: 'Manufacturer Name',
+    9: 'Designer',
+    10: 'Description',
+    11: 'Vendor URL',
+    12: 'Designer URL',
+    13: 'Licencee Description',
+    14: 'Licence URL',
+    15: 'Preferred Family',
+    16: 'Preferred Subfamily',
+    17: 'Compatible Full',
+    18: 'Sample Text',
+    19: 'PS CID findfont name',
+    20: 'WWS Family Name',
+    21: 'WWS Subfamily Name'
+}
+
+
+class TTFNameRecord(PackedFormat):
+    FORMAT = [
+        {'name': 'platform_id', 'format': 'H'},
+        {'name': 'encoding_id', 'format': 'H'},
+        {'name': 'language_id', 'format': 'H'},
+        {'name': 'name', 'format': 'H'},
+        {'name': 'length', 'format': 'H'},
+        {'name': 'offset', 'format': 'H'},
+    ]
+
+    def __init__(self, fh, data):
+        self.pos = fh.tell()
+        PackedFormat.__init__(self, fh)
+        self.raw = data[self.offset:self.offset + self.length]
+        self.value = self.raw
+        if self.platform_id == 1:
+            if self.encoding_id == 0:
+                self.value = self.raw.decode('iso-8859-1')
+        elif self.platform_id == 3:
+            if self.encoding_id == 1:
+                # UCS-2
+                self.value = self.raw.decode('utf-16-be')
+
+    def __str__(self):
+        return '{:08d} @ {:08X} - {:>30s}: {}'.format(self.pos, self.offset,
+                                                      TTF_NAMES.get(self.name, 'Unknown Name {:X}'.format(self.name)),
+                                                      self.value)
+
+
+class TTF_name(PackedFormat):
+    FORMAT = [
+        {'name': 'format', 'format': 'H'},
+        {'name': 'count', 'format': 'H'},
+        {'name': 'offset', 'format': 'H'},
+    ]
+
+    def __init__(self, fh, length):
+        start_pos = fh.tell()
+        PackedFormat.__init__(self, fh)
+        pos = fh.tell()
+        fh.seek(start_pos + self.offset)
+        data = fh.read(length - self.offset)
+        fh.seek(pos)
+        self.names = []
+        for n in range(self.count):
+            self.names.append(TTFNameRecord(fh, data))
+#            print("{} / {} - {}".format(n + 1, self.count, self.names[-1]))
+
+    def get_name(self, name, default=None):
+        for n in self.names:
+            if n.name == name and n.platform_id == 1 and n.encoding_id == 0:
+                return n.value
+        return default
+
+
+class TTFHeader(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'I', 'convert': fixed_version},
+        {'name': 'num_tables', 'format': 'H'},
+        {'name': 'search_range', 'format': 'H'},
+        {'name': 'entry_selector', 'format': 'H'},
+        {'name': 'range_shift', 'format': 'H'},
+    ]
+
+    def __init__(self, fh=None):
+        self.tables = []
+        self.num_tables = 0
+        PackedFormat.__init__(self, fh)
+        for n in range(self.num_tables):
+            self.tables.append(TTFOffsetTable(fh))
+
+    def check_version(self):
+        return self.version == 1
+
+    def get_tag(self, tag):
+        for t in self.tables:
+            if t.tag == tag:
+                return t
+            if tag == b'os2' and t.tag == b'OS/2':
+                return t
+        return None
+
+    def dump_tables(self):
+        print("TTF Header Tables:")
+        for t in self.tables:
+            print("    {}  @ {}".format(t.tag, t.offset))
+
+
+class TTF_kern(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'H'},
+        {'name': 'num_tables', 'format': 'H'}
+    ]
+    def __init__(self, fh=None, length=None):
+        self.subtables = []
+        PackedFormat.__init__(self, fh)
+        if fh is None:
+            return
+        for n in range(self.num_tables):
+            tbl = TTF_kern_subtable(fh)
+            fh.seek(tbl.length - len(tbl), 1)
+            self.subtables.append(tbl)
+
+
+class TTF_kern_subtable(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'H'},
+        {'name': 'length', 'format': 'H'},
+        {'name': 'coverage', 'format': 'H'},
+    ]
+    def __init__(self, fh=None):
+        if fh is not None:
+            self.offset = fh.tell()
+        PackedFormat.__init__(self, fh)
+
+
+
+class TTFOffsetTable(PackedFormat):
+    FORMAT = [
+        {'name': 'tag', 'format': '4s'},
+        {'name': 'checksum', 'format': 'I'},
+        {'name': 'offset', 'format': 'I'},
+        {'name': 'length', 'format': 'I'},
+    ]
+
+    def __str__(self):
+        return 'Offset Table: {}  {} bytes @ {}'.format(self.tag, self.length, self.offset)
+
+    def padded_length(self):
+        return self.length + 3 & ~ 3
+
+    def padded_data(self, data):
+        extra = self.padded_length() - len(data)
+        if extra > 0:
+            return data + '\0' * extra
+        return data
+
+    def calculate_checksum(self, data):
+        self.checksum = ttf_checksum(data)
+
+
+class TTF_head(PackedFormat):
+    FORMAT = [
+        {'name': 'vers', 'format': 'i'},
+        {'name': 'font_version', 'format': 'i'},
+        {'name': 'checksum_adj', 'format': 'I'},
+        {'name': 'magic_number', 'format': 'I'},
+        {'name': 'flags', 'format': 'H'},
+        {'name': 'units_per_em', 'format': 'H', 'convert': float},
+        {'name': 'created', 'format': 'q'},
+        {'name': 'modified', 'format': 'q'},
+        {'name': 'x_min', 'format': 'h'},
+        {'name': 'y_min', 'format': 'h'},
+        {'name': 'x_max', 'format': 'h'},
+        {'name': 'y_max', 'format': 'h'},
+        {'name': 'mac_style', 'format': 'H'},
+        {'name': 'lowest_rec_ppem', 'format': 'H'},
+        {'name': 'direction_hint', 'format': 'H'},
+        {'name': 'index_to_loc_format', 'format': 'h'},
+        {'name': 'glyph_data_format', 'format': 'h'},
+    ]
+
+    @property
+    def bounding_box(self):
+        scale = 1000 / self.units_per_em
+        return [(self.x_min * scale),
+                (self.y_min * scale),
+                (self.x_max * scale),
+                (self.y_max * scale)]
+
+    def decode_mac_style(self):
+        return {
+            'bold': self.mac_style & 1 << 0,
+            'italic': self.mac_style & 1,
+            'underline': self.mac_style & 1 << 1,
+            'outline': self.mac_style & 1 << 2,
+            'shadow': self.mac_style & 1 << 3,
+            'condensed': self.mac_style & 1 << 4,
+            'extended': self.mac_style & 1 << 5
+        }
+
+
+class TTF_hhea(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'i', 'convert': fixed_version},
+        {'name': 'ascender', 'format': 'h'},
+        {'name': 'descender', 'format': 'h'},
+        {'name': 'line_gap', 'format': 'h'},
+        {'name': 'advance_width_max', 'format': 'H'},
+        {'name': 'min_left_side_bearing', 'format': 'h'},
+        {'name': 'min_right_dide_brearing', 'format': 'h'},
+        {'name': 'x_max_extant', 'format': 'h'},
+        {'name': 'caret_slope_rise', 'format': 'h'},
+        {'name': 'caret_slope_run', 'format': 'h'},
+        {'name': 'caret_offset', 'format': 'h'},
+        {'name': 'reserved', 'format': 'q'},
+        {'name': 'metric_data_format', 'format': 'h'},
+        {'name': 'number_of_metrics', 'format': 'H'},
+    ]
+
+
+class TTF_os2(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'H'},
+        {'name': 'xAvgCharWidth', 'format': 'h'},
+        {'name': 'weight_class', 'format': 'H'},
+        {'name': 'usWidthClass', 'format': 'H'},
+        {'name': 'fsType', 'format': 'H'},
+        {'name': 'ySubscriptXSize', 'format': 'h'},
+        {'name': 'ySubscriptYSize', 'format': 'h'},
+        {'name': 'ySubscriptXOffset', 'format': 'h'},
+        {'name': 'ySubscriptYOffset', 'format': 'h'},
+        {'name': 'ySuperscriptXSize', 'format': 'h'},
+        {'name': 'ySuperscriptYSize', 'format': 'h'},
+        {'name': 'ySuperscriptXOffset', 'format': 'h'},
+        {'name': 'ySuperscriptYOffset', 'format': 'h'},
+        {'name': 'yStrikeoutSize', 'format': 'h'},
+        {'name': 'yStrikeoutPosition', 'format': 'h'},
+        {'name': 'sFamilyClass', 'format': 'h'},
+        {'name': 'panose', 'format': '10s'},
+        {'name': 'ulUnicodeRange1', 'format': 'I'},
+        {'name': 'ulUnicodeRange2', 'format': 'I'},
+        {'name': 'ulUnicodeRange3', 'format': 'I'},
+        {'name': 'ulUnicodeRange4', 'format': 'I'},
+        {'name': 'achVendID', 'format': '4s'},
+        {'name': 'fsSelection', 'format': 'H'},
+        {'name': 'usFirstCharIndex', 'format': 'H'},
+        {'name': 'usLastCharIndex', 'format': 'H'},
+        {'name': 'sTypoAscender', 'format': 'h'},
+        {'name': 'sTypoDescender', 'format': 'h'},
+        {'name': 'typo_line_gap', 'format': 'h'},
+        {'name': 'win_ascent', 'format': 'H'},
+        {'name': 'win_descent', 'format': 'H'},
+        {'name': 'ulCodePageRange1', 'format': 'I'},
+        {'name': 'ulCodePageRange2', 'format': 'I'},
+        {'name': 'sxHeight', 'format': 'h'},
+        {'name': 'cap_height', 'format': 'h'},
+        {'name': 'usDefaultChar', 'format': 'H'},
+        {'name': 'usBreakChar', 'format': 'H'},
+        {'name': 'usMaxContext', 'format': 'H'}
+    ]
+
+
+class TTF_post(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'I', 'convert': fixed_version},
+        {'name': 'italic_angle', 'format': 'I'},
+        {'name': 'underline_position', 'format': 'h'},
+        {'name': 'underline_thickness', 'format': 'h'},
+        {'name': 'is_fixed_pitch', 'format': 'I'},
+        {'name': 'min_mem_type42', 'format': 'I'},
+        {'name': 'max_mem_type42', 'format': 'I'},
+        {'name': 'min_mem_type1', 'format': 'I'},
+        {'name': 'max_mem_type1', 'format': 'I'},
+
+    ]
+
+
+class TTF_maxp(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'I', 'convert': fixed_version},
+        {'name': 'num_glyphs', 'format': 'H'},
+    ]
+
+
+class TTF_cmap4(PackedFormat):
+    FORMAT = [
+        {'name': 'language', 'format': 'H'},
+        {'name': 'seg_count', 'format': 'H', 'convert': '_halve_'},
+        {'name': 'src_range', 'format': 'H'},
+        {'name': 'entry_selector', 'format': 'H'},
+        {'name': 'range_shift', 'format': 'H'},
+    ]
+
+    @staticmethod
+    def _halve_(n):
+        return int(n / 2)
+
+    class CMAPRange:
+        def __init__(self, start, end, delta, offset, n_segments):
+            self.start = start
+            self.end = end
+            self.delta = delta
+            self.offset = 0 if offset == 0 else int(offset / 2 - n_segments)
+
+        def contains(self, n):
+            return self.start <= n <= self.end
+
+        def coverage(self):
+            return range(self.start, self.end + 1)
+
+        def char_to_glyph(self, n, glyphs):
+            if self.offset == 0:
+                return (n + self.delta) & 0xFFFF
+            idx = self.offset + n - self.start
+            if 0 < idx < len(glyphs):
+                print("Invalid index for glyphs! {}".format(idx))
+                return 0
+            return (glyphs[idx] + self.delta) & 0xFFFF
+
+    def __init__(self, fh=None, length=None):
+        start = fh.tell() - 4
+        PackedFormat.__init__(self, fh)
+        if fh is None:
+            return
+        self.ranges = []
+
+        end_codes = read_list_uint16(fh, self.seg_count + 1)
+        if end_codes[self.seg_count] != 0:
+            print("INVALID pad byte....")
+            return
+        start_codes = read_list_uint16(fh, self.seg_count)
+        iddelta = read_list_int16(fh, self.seg_count)
+        offset_start = fh.tell()
+        id_offset = read_list_uint16(fh, self.seg_count)
+
+        ids_length = int((length - (fh.tell() - start)) / 2)
+        self.glyph_ids = read_list_uint16(fh, ids_length)
+
+        for n in range(self.seg_count):
+            self.ranges.append(self.CMAPRange(start_codes[n], end_codes[n], iddelta[n], id_offset[n], self.seg_count - n))
+
+    def __len__(self):
+        return len(self.ranges)
+
+    def char_to_glyph(self, char):
+        for r in self.ranges:
+            if not r.contains(char):
+                continue
+            return r.char_to_glyph(char, self.glyph_ids)
+
+    def as_map(self, max_char):
+        cm = {}
+        for r in self.ranges:
+            if r.start > max_char:
+                continue
+            for c in range(r.start, max(r.end, max_char)):
+                cm[c] = r.char_to_glyph(c, self.glyph_ids)
+        return cm
+
+
+class TTF_cmap6(PackedFormat):
+    FORMAT = [
+        {'name': 'language', 'format': 'H'},
+        {'name': 'first_code', 'format': 'H'},
+        {'name': 'entry_count', 'format': 'H'},
+    ]
+
+    def __init__(self, fh, length):
+        PackedFormat.__init__(self, fh)
+        self.char_map = {}
+        self.glyph_map = {}
+
+        mapping = read_list_uint16(fh, self.entry_count)
+        for n in range(self.entry_count):
+            self.char_map[n] = mapping[n]
+            self.glyph_map.setdefault(mapping[n], []).append(n)
+
+    def __len__(self):
+        return len(self.char_map)
+
+
+class TTF_cmap(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'H'},
+        {'name': 'count', 'format': 'H'},
+    ]
+    PREFS = [(0, 4), (0, 3), (3, 1)]
+
+    def __init__(self, fh=None, length=0):
+        self.count = 0
+        if fh:
+            start_pos = fh.tell()
+        PackedFormat.__init__(self, fh)
+        self.tables = {}
+
+        self.map_table = None
+
+        if self.count == 0:
+            return
+
+        for n in range(self.count):
+            tbl = TTFcmapTable(fh)
+            self.tables[(tbl.platform_id, tbl.encoding_id)] = tbl
+
+            pos = fh.tell()
+
+            fh.seek(start_pos + tbl.offset)
+            tbl.format, length = read_list_uint16(fh, 2)
+            if tbl.format == 4:
+                tbl.map_data = TTF_cmap4(fh, length)
+            elif tbl.format == 6:
+                tbl.map_data = TTF_cmap6(fh, length)
+            fh.seek(pos)
+
+        # Choose the mapping we are going to use, initially on preferences and
+        # then just fallback to first available map.
+        for p in self.PREFS:
+            if p in self.tables and self.tables[p].has_map_data:
+                self.map_table = self.tables[p].map_data
+                break
+        if self.map_table is None:
+            for t in self.tables.values():
+                if t.has_map_data:
+                    self.map_table = t.map_data
+                    break
+
+    def char_to_glyph(self, char, fh):
+        for p in self.PREFS:
+            if p in self.tables and self.tables[p].has_map_data:
+                for rng in self.tables[p].map_data.ranges:
+                    if rng.end < char:
+                        continue
+                    if rng.start > char:
+                        continue
+                    return rng.char_to_glyph(char, fh)
+
+        return None
+
+    def char_map(self, max_char=256):
+        return self.map_table.as_map(max_char)
+
+    def as_table_string(self):
+        s = PackedFormat.as_table_string(self)
+        n = 0
+        for t in self.tables:
+            s += '\nTable: {}\n'.format(n)
+            s += t.as_table_string()
+            n += 1
+        return s
+
+
+class TTFcmapTable(PackedFormat):
+    FORMAT = [
+        {'name': 'platform_id', 'format': 'H'},
+        {'name': 'encoding_id', 'format': 'H'},
+        {'name': 'offset', 'format': 'I'},
+    ]
+
+    def __init__(self, fh=None):
+        PackedFormat.__init__(self, fh)
+        self.format = 0
+        self.map_data = None
+        self.position = 0
+
+    @property
+    def has_map_data(self):
+        return self.map_data is not None and len(self.map_data) > 0
+
+    def as_map(self, max_char):
+        cm = {}
+        for r in self.map_data.ranges:
+            cm.update(r.as_map(max_char))
+        return cm
+
+
+class TTF_glyf(PackedFormat):
+    FORMAT = [
+        {'name': 'contours', 'format': 'h'},
+        {'name': 'x_min', 'format': 'h'},
+        {'name': 'y_min', 'format': 'h'},
+        {'name': 'x_max', 'format': 'h'},
+        {'name': 'y_max', 'format': 'h'},
+    ]
+
+    def __init__(self, fh=None, num=0, data=None):
+        self.glyph = num
+        self.components = []
+        self.required = set()
+        PackedFormat.__init__(self, fh=fh, data=data)
+
+        # If the glyph is a compound glyph, ie it's made up of parts of other glyphs,
+        # then we need to ensure we have all the component glyphs listed.
+        if self.contours < 0:
+            while True:
+                (flags, next_glyph) = read_list_uint16(fh, 2)
+                self.required.add(next_glyph)
+                fh.read(calcsize(glyf_skip_format(flags)))
+                if not glyph_more_components(flags):
+                    break
+
+    def is_compound(self):
+        return self.contours < 0
+
+    def glyph_set(self):
+        rqd = set(self.required)
+        for c in self.components:
+            rqd.extend(c.required)
+        return sorted(rqd)
+
+
+class TTFCollectionHeader(PackedFormat):
+    FORMAT = [
+        {'name': 'tag', 'format': '4s'},
+        {'name': 'version', 'format': 'I', 'convert': fixed_version},
+        {'name': 'count', 'format': 'I'}
+    ]
+
+    def __init__(self, fh):
+        PackedFormat.__init__(self, fh)
+        self.offsets = []
+        self.is_collection = (self.tag == b'ttcf')
+        if self.is_collection:
+            for i in range(self.count):
+                self.offsets.append(unpack('>I', fh.read(4))[0])
+        else:
+            self.count = 1
+            self.offsets = [0]
+        if self.version == 2:
+            self.dsig_tag, self.dsig_length, self.dsig_offset = unpack("III", fh.read(calcsize('III')))
+
+
+class TTF_gpos(PackedFormat):
+    FORMAT = [
+        {'name': 'version', 'format': 'I', 'convert': fixed_version},
+        {'name': 'script_list', 'format': 'H'},
+        {'name': 'feature_list', 'format': 'H'},
+        {'name': 'lookup_list', 'format': 'H'},
+    ]
+
+
+class OFT_ScriptList(PackedFormat):
+    FORMAT = [
+        {'name': 'count', 'format': 'H'}
+    ]
+
+    def __init__(self, fh, length=None):
+        self.records = []
+        PackedFormat.__init__(self, fh)
+        for n in range(self.count):
+            self.records.append(OFT_ScriptRecord(fh))
+
+
+class OFT_ScriptRecord(PackedFormat):
+    FORMAT = [
+        {'name': 'tag', 'format': '4s'},
+        {'name': 'offset', 'format': 'H'}
+    ]
+
--- a/zttf/subset.py
+++ b/zttf/subset.py
@@ -0,0 +1,257 @@
+from io import BytesIO
+from struct import pack, unpack, calcsize, error as struct_error
+from zttf.objects import TTF_post, TTFHeader, TTFOffsetTable, TTF_kern, TTF_kern_subtable
+from zttf.utils import Range, glyph_more_components, glyf_skip_format, ttf_checksum, binary_search_parameters
+
+
+class TTFSubset:
+    def __init__(self, parent, subset):
+        self.parent = parent
+        self.subset = subset
+
+        self.tables = {}
+        # We need to build 2 maps, one for character -> glyph and one
+        # for glyph -> character
+        self.orig_char_to_glyph = {}
+        self.orig_glyph_to_char = {}
+        self.glyph_map = {}
+
+        self.char_to_glyph = {}
+        self.glyph_to_char = {}
+        self.cmap_ranges = []
+
+        self.required_glyphs = [0]
+        self.metrics = []
+
+        self.fh = None
+
+    def start_table(self, tag, data=None):
+        b = BytesIO()
+        if data is not None:
+            b.write(data)
+        self.tables[tag] = b
+        return b
+
+    def find_glyph_subset(self):
+        for s in self.subset:
+            self.parent.char_to_glyph(s)
+
+        char_to_glyphs = self.parent.get_table(b'cmap').char_map()
+        rqd = []
+        for code in self.subset:
+            glyph = char_to_glyphs.get(code)
+            if glyph is None:
+                print("Unknown character in parent mapping: {}".format(code))
+                continue
+#            print("character {} is glyph {}".format(code, glyph))
+            self.orig_char_to_glyph[code] = glyph
+            self.orig_glyph_to_char.setdefault(glyph, []).append(code)
+            if glyph not in rqd:
+                rqd.append(glyph)
+
+        for glyph in rqd:
+            self.required_glyphs.append(glyph)
+            self.required_glyphs.extend(self.parent.get_glyph_components(glyph))
+
+        self.required_glyphs.sort()
+
+        self.glyph_map = {}
+        for rg in self.required_glyphs:
+            glyph = len(self.glyph_map) + 1
+            self.glyph_map[rg] = glyph
+            if rg in self.orig_glyph_to_char:
+                for cc in self.orig_glyph_to_char[rg]:
+                    self.char_to_glyph[cc] = glyph
+                self.glyph_to_char[glyph] = self.orig_glyph_to_char[rg]
+
+    def copy_tables(self):
+        for tag in [b'name', b'cvt', b'fpgm', b'prep', b'gasp']:
+            if tag in self.parent.tables:
+                buff = self.start_table(tag)
+                tbl = self.parent.header.get_tag(tag)
+                self.fh.seek(tbl.offset)
+                buff.write(self.fh.read(tbl.length))
+
+        new_post = TTF_post()
+        for f in ['italic_angle', 'underline_position', 'Underline_thickness', 'is_fixed_pitch']:
+            setattr(new_post, f, self.parent.get_table_attr(b'post', f))
+        self.start_table(b'post', new_post.as_bytes())
+
+        head = self.parent.copy_table(b'head')
+        head.checksum_adj = 0
+        head.index_to_loc_format = 0
+        self.start_table(b'head', head.as_bytes())
+
+        hhea = self.parent.copy_table(b'hhea')
+        hhea.number_of_metrics = len(self.metrics)
+        self.start_table(b'hhea', hhea.as_bytes())
+
+        maxp = self.parent.copy_table(b'maxp')
+        maxp.b_glyphs = len(self.required_glyphs)
+        self.start_table(b'maxp', maxp.as_bytes())
+
+        self.start_table(b'os2', self.parent.copy_table(b'os2').as_bytes())
+        # todo - is it worth finding a way to subset the GPOS and LTSH tables?
+
+    def build_cmap_ranges(self):
+        # As we will likely have a scattered map we will use CMAP Format 4.
+        # We take the character mappings we have and build 4 lists...
+        #   start code
+        #   end code
+        #   id delta
+        #   range offset
+        self.cmap_ranges = []
+        for cc, glyph in sorted(self.char_to_glyph.items()):
+            try:
+                current = self.cmap_ranges[-1]
+                if current is None or not current.is_consecutive(cc, glyph):
+                    self.cmap_ranges.append(Range(cc, glyph))
+                else:
+                    current.expand(cc)
+            except IndexError:
+                self.cmap_ranges.append(Range(cc, glyph))
+
+    def add_cmap_table(self):
+        if self.cmap_ranges == []:
+            self.build_cmap_ranges()
+        self.cmap_ranges.append(Range(0xffff, 0))
+        self.cmap_ranges[-1].iddelta = 0
+
+        seg_count = len(self.cmap_ranges)
+        src_range, entry_selector, range_shift = binary_search_parameters(seg_count * 2)
+        length = 16 + 8 * seg_count + len(self.glyph_to_char) + 1
+
+        data = [
+            0,        # version
+            1,        # number of subtables
+            3,        # platform id (MS)
+            1,        # endocing id (Unicode)
+            0, 12,    # subtable location
+            #           subtable
+            4,        # format
+            length,   # length
+            0,                          # language
+            seg_count * 2,              # seg count * 2
+            src_range,                  # search range (2 ** floor(log2(seg_count)))
+            entry_selector,             # entry selector  log2(src_range / 2)
+            seg_count * 2 - src_range,  # range shift ( 2 * seg_count - search_range)
+        ]
+        data.extend([r.end for r in self.cmap_ranges])
+        data.append(0)
+        data.extend([r.start for r in self.cmap_ranges])
+
+        buff = self.start_table(b'cmap')
+        buff.write(pack(">{}H".format(len(data)), *data))
+        buff.write(pack(">{}h".format(len(self.cmap_ranges)), *[r.iddelta for r in self.cmap_ranges]))
+        buff.write(pack(">{}H".format(len(self.cmap_ranges)), *[r.offset for r in self.cmap_ranges]))
+        buff.write(pack(">{}H".format(len(self.cmap_ranges)), *[r.start_glyph for r in self.cmap_ranges]))
+
+    def get_glyphs(self):
+        locations = []
+        self.metrics = []
+        buff = self.start_table(b'glyf')
+        for g in self.required_glyphs:
+            locations.append(int(buff.tell() / 2))
+            data = self.parent.get_glyph_data(g)
+            if data == b'':
+                continue
+            if unpack(">h", data[:2])[0] == -1:
+                # need to adjust glyph index...
+                pos = 10
+                while True:
+                    flags, next_glyph = unpack(">HH", data[pos: pos + 4])
+                    data = data[:pos + 2] + pack(">H", self.glyph_map[next_glyph]) + data[pos+4:]
+                    pos += 4 + calcsize(glyf_skip_format(flags))
+                    if not glyph_more_components(flags):
+                        break
+            buff.write(data)
+            self.metrics.append(self.parent.glyph_metrics[g])
+        loca = self.start_table(b'loca')
+        loca.write(pack(">{}H".format(len(locations)), *locations))
+
+        hmtx = self.start_table(b'hmtx')
+        for m in self.metrics:
+            hmtx.write(pack(">Hh", *m))
+
+    def add_kern_data(self):
+        entries = {}
+
+        for k, diff in self.parent.glyph_kern.items():
+            if k[0] not in self.required_glyphs or k[1] not in self.required_glyphs:
+                continue
+#            print("mapping {} to ({}, {})".format(k, self.glyph_map[k[0]], self.glyph_map[k[1]]))
+            entries[(self.glyph_map[k[0]], self.glyph_map[k[1]])] = diff
+        if len(entries) == 0:
+            return
+
+        kern = self.start_table(b'kern')
+        kh = TTF_kern()
+        kh.version = 0
+        kh.num_tables = 1
+        kern.write(kh.as_bytes())
+        st = TTF_kern_subtable()
+        st.length = len(st) + 6 * len(entries) + 8
+        st.version = 0
+        st.coverage = 1
+        kern.write(st.as_bytes())
+        kern.write(pack(">H", len(entries)))
+        kern.write(pack(">HHH", *binary_search_parameters(len(entries))))
+        for key, diff in entries.items():
+            kern.write(pack(">HHh", key[0], key[1], diff))
+
+    # Put the TTF file together
+    def output(self):
+        """ Generate a binary based on the subset we have been given. """
+
+        self.fh = open(self.parent.filename, 'rb')
+        self.fh.seek(self.parent.start_pos)
+
+        self.find_glyph_subset()
+        self.add_kern_data()
+        self.copy_tables()
+        self.add_cmap_table()
+        self.get_glyphs()
+#        self.dump_tables()
+
+        self.fh.close()
+
+        header = TTFHeader()
+        header.num_tables = len(self.tables)
+        header.version_raw = 0x00010000
+
+        output = BytesIO()
+        header.entry_selector, header.search_range, header.range_shift = binary_search_parameters(len(self.tables))
+        output.write(header.as_bytes())
+
+        head_offset = 0
+        offset = output.tell() + 16 * len(self.tables)
+        sorted_tables = sorted(self.tables.keys())
+        for tag in sorted_tables:
+            if tag == b'head':
+                head_offset = offset
+            tbl = TTFOffsetTable()
+            tbl.tag = tag
+            tbl.offset = offset
+            data = self.tables[tag].getvalue()
+            tbl.length = len(data)
+            tbl.calculate_checksum(data)
+            offset += tbl.padded_length()
+            output.write(tbl.as_bytes())
+
+        for tag in sorted_tables:
+            data = self.tables[tag].getvalue()
+            data += b'\0' * (len(data) % 4)
+            output.write(data)
+
+        checksum = 0xB1B0AFBA - ttf_checksum(output.getvalue())
+        data = output.getvalue()
+        try:
+            data = data[:head_offset + 8] + pack(">I", checksum) + data[head_offset + 12:]
+        except struct_error:
+            data = data[:head_offset + 8] + pack(">i", checksum) + data[head_offset + 12:]
+        return data
+
+    def dump_tables(self):
+        for n in sorted(self.tables):
+            print("{} {} bytes".format(n, self.tables[n].tell()))
+
--- a/zttf/ttf.py
+++ b/zttf/ttf.py
@@ -0,0 +1,272 @@
+from copy import copy
+from struct import calcsize, unpack
+
+from zttf.objects import TTFHeader, TTF_head, TTF_name, TTF_hhea, TTF_os2, TTF_post, TTF_maxp, TTF_cmap, TTF_glyf, \
+    TTF_kern
+from zttf.subset import TTFSubset
+from zttf.utils import read_list_uint16, read_list_uint32
+
+
+class TTFont(object):
+    def __init__(self, filename, offset):
+        self.header = None
+        self.tables = {}
+        self.filename = filename
+        self.start_pos = offset
+
+        self.idx_format = 0
+        self.n_glyphs = 0
+        self.glyph_metrics = []
+        self.glyph_kern = {}
+
+        self.file_handle = None
+        self.parse()
+
+    def parse(self):
+        self._open()
+        self.header = self._read_class(TTFHeader)
+        if not self.header.check_version():
+            return
+
+        self.get_table(b'head', TTF_head)
+        self.get_table(b'name', TTF_name)
+        self.get_table(b'hhea', TTF_hhea)
+        self.get_table(b'os2', TTF_os2)
+        self.get_table(b'post', TTF_post)
+        self.get_table(b'maxp', TTF_maxp)
+        self.get_table(b'cmap', TTF_cmap)
+        self.get_table(b'kern', TTF_kern)
+
+        self.idx_format = self.get_table_attr(b'head', 'index_to_loc_format')
+        self.n_glyphs = self.get_table_attr(b'maxp', 'num_glyphs', 0)
+
+        self.get_hmtx()
+        self.get_loca()
+        if b'kern' in self.tables:
+            self.get_kern_data()
+
+        self._close()
+
+    COMMON_DATA = {
+        'font_family': (b'name', 1),
+        'name': (b'name', 6),
+        'ascender': (b'hhea', 'ascender'),
+        'descender': (b'hhea', 'descender'),
+        'units_per_em': (b'head', 'units_per_em', 1000),
+        'cap_height': (b'os2', 'cap_height', 0),
+        'bounding_box': (b'head', 'bounding_box'),
+        'italic_angle': (b'post', 'italic_angle'),
+        'underline_position': (b'post', 'underline_position'),
+        'underline_thickness': (b'post', 'underline_thickness'),
+        'weight_class': (b'os2', 'weight_class'),
+        'line_gap': (b'hhea', 'line_gap'),
+        'typo_line_gap': (b'os2', 'typo_line_gap'),
+        'win_ascent': (b'os2', 'win_ascent'),
+        'win_descent': (b'os2', 'win_descent')
+    }
+
+    def __getattr__(self, item):
+        if item in self.COMMON_DATA:
+            how = self.COMMON_DATA[item]
+            if how[0] == b'name':
+                return self.get_name_table(*how[1:])
+            if len(how) > 2:
+                return self.get_table_attr(*how[:3])
+            return self.get_table_attr(*how)
+
+    @property
+    def stemv(self):
+        return 50 + int(pow((self.weight_class / 65.0), 2))
+
+    @property
+    def italic(self):
+        return self.italic_angle != 0
+
+    def get_string_width(self, string):
+        width = 0
+        for n in range(len(string)):
+            glyph = self.char_to_glyph(ord(string[n]))
+            (aw, lsb) = self.glyph_metrics[glyph]
+            width += aw
+            if n == 0:
+                width -= lsb
+            elif n < len(string) - 1:
+                glyf2 = self.char_to_glyph(ord(string[n + 1]))
+                width += self.glyph_kern.get((glyph, glyf2), 0)
+        return width
+
+    def get_char_width(self, char):
+        if isinstance(char, str):
+            char = ord(char)
+        idx = self.char_to_glyph(char)
+        if 0 < idx < len(self.glyph_metrics):
+            idx = 0
+        return self.glyph_metrics[idx][0]
+
+    # Internal Table Functions
+    def get_table(self, tag, obj_class=None):
+        tbl_obj = self.tables.get(tag)
+        if tbl_obj is None and obj_class is not None:
+            tbl = self.header.get_tag(tag)
+            if tbl is None:
+                return None
+            orig_pos = self._seek(tbl.offset)
+            tbl_obj = self._read_class(obj_class, tbl.length)
+            self.tables[tag] = tbl_obj
+            self._seek(orig_pos)
+        return tbl_obj
+
+    def get_table_attr(self, tbl, attr, default=None):
+        if tbl not in self.tables:
+            return default
+        return getattr(self.tables[tbl], attr, default)
+
+    def get_name_table(self, n_attr, default=None):
+        if b'name' not in self.tables:
+            return default
+        return self.tables[b'name'].get_name(n_attr, default)
+
+    def copy_table(self, tag):
+        tbl = self.get_table(tag)
+        return copy(tbl)
+
+    def _get_table_offset(self, tag):
+        tbl = self.header.get_tag(tag)
+        return tbl.offset if tbl is not None else 0
+
+    def get_hmtx(self):
+        """ Read the glyph metrics. """
+        n_metrics = self.get_table_attr(b'hhea', 'number_of_metrics')
+
+        offset = self._get_table_offset(b'hmtx')
+        if offset == 0:
+            return False
+        self._seek(offset)
+        aw = 0
+        for n in range(n_metrics):
+            aw, lsb = unpack(">Hh", self.file_handle.read(4))
+            self.glyph_metrics.append((aw, lsb))
+        # Now we have read the aw and lsb for specific glyphs, we need to read additional
+        # lsb data.
+        extra = self.n_glyphs - n_metrics
+        if extra > 0:
+            lsbs = self._read_list_int16(extra)
+            for n in range(extra):
+                self.glyph_metrics.append((aw, lsbs[n]))
+
+    def get_loca(self,):
+        start = self._get_table_offset(b'loca')
+        self._seek(start)
+        if self.idx_format == 0:
+            self.tables[b'loca'] = [n * 2 for n in self._read_list_uint16(self.n_glyphs + 1)]
+        elif self.idx_format == 1:
+            self.tables[b'loca'] = self._read_list_uint32(self.n_glyphs + 1)
+
+    def get_kern_data(self):
+        kern = self.get_table(b'kern')
+        for st in kern.subtables:
+            if st.coverage != 1 or st.version != 0:
+                print("coverage = {}, version = {}  - skipping".format(st.coverage, st.version))
+                continue
+            self._seek(st.offset + len(st))
+            (npairs, a, b, c) = self._read_list_uint16(4)
+            for n in range(npairs):
+                (l, r) = self._read_list_uint16(2)
+                diff = self._read_int16()
+                self.glyph_kern[(l, r)] = diff
+
+    def char_to_glyph(self, char):
+        self._open()
+        cmap = self.get_table(b'cmap')
+        glyph = cmap.char_to_glyph(char, self.file_handle)
+        return glyph or 0
+
+    def get_glyph_position(self, glyph):
+        loca = self.get_table(b'loca')
+        return loca[glyph]
+
+    def get_glyph_components(self, glyph):
+        """ Return a list of any component glyphs required. """
+        if glyph < 0 or glyph > self.n_glyphs:
+            print("Missing glyph!!! {}".format(glyph))
+            return []
+        pos = self._get_table_offset(b'glyf') + self.get_glyph_position(glyph)
+        glyf = self._read_class(TTF_glyf, offset=pos, length=glyph)
+        for g in glyf.required:
+            for extra_glyph in self.get_glyph_components(g):
+                if extra_glyph not in glyf.required:
+                    glyf.required.append(extra_glyph)
+        return sorted(glyf.required)
+
+    def get_glyph_data(self, glyph):
+        data_start = self._get_table_offset(b'glyf')
+        glyph_start = self.get_glyph_position(glyph)
+        glyph_length = self.get_glyph_position(glyph + 1) - glyph_start
+        if glyph_length == 0:
+            print("Zero length glyph @ {}".format(glyph))
+            return b''
+        self._open()
+        self.file_handle.seek(data_start + glyph_start)
+        return self.file_handle.read(glyph_length)
+
+    def get_binary_table(self, tag):
+        tbl = self.header.get_tag(tag)
+        print(tbl)
+        if tbl is None:
+            return b''
+        self._open()
+        self._seek(tbl.offset)
+        return self.file_handle.read(tbl.length)
+
+    def make_subset(self, subset):
+        """ Given a subset of characters, create a subset of the full TTF file suitable for
+            inclusion in a PDF.
+        :param subset: List of characters to include.
+        :return: TTFSubset object
+        """
+        return TTFSubset(self, subset)
+
+
+    # File functions.
+    def _open(self):
+        if self.file_handle is None:
+            self.file_handle = open(self.filename, 'rb')
+        self.file_handle.seek(self.start_pos)
+
+    def _close(self):
+        if self.file_handle is not None:
+            self.file_handle.close()
+            self.file_handle = None
+
+    def _seek(self, offset, whence=0):
+        self._open()
+        pos = self.file_handle.tell()
+        self.file_handle.seek(offset, whence)
+        return pos
+
+    def _read_class(self, cls, length=None, offset=None):
+        if offset is not None:
+            self._seek(offset)
+        if length is not None:
+            return cls(self.file_handle, length)
+        return cls(self.file_handle)
+
+    def _skip(self, offset):
+        if self.file_handle is not None:
+            self.file_handle.seek(offset, 1)
+
+    def _read_list_int16(self, n):
+        _fmt = ">{}h".format(n)
+        return unpack(_fmt, self.file_handle.read(calcsize(_fmt)))
+
+    def _read_list_uint16(self, n):
+        return read_list_uint16(self.file_handle, n)
+
+    def _read_uint16(self):
+        return unpack(">H", self.file_handle.read(2))[0]
+
+    def _read_int16(self):
+        return unpack(">h", self.file_handle.read(2))[0]
+
+    def _read_list_uint32(self, n):
+        return read_list_uint32(self.file_handle, n)
--- a/zttf/ttfile.py
+++ b/zttf/ttfile.py
@@ -0,0 +1,22 @@
+from os.path import exists, getsize
+
+from zttf.objects import TTFCollectionHeader
+from zttf.ttf import TTFont
+
+
+class TTFile(object):
+    def __init__(self, filename):
+        self.filename = filename
+        self.faces = []
+
+        if not exists(filename) or getsize(filename) == 0:
+            raise IOError("The file '{}' does not exist or is empty".format(filename))
+
+        with open(self.filename, 'rb') as fh:
+            hdr = TTFCollectionHeader(fh)
+            for off in hdr.offsets:
+                self.faces.append(TTFont(filename, off))
+
+    @property
+    def is_valid(self):
+        return len(self.faces) > 0
--- a/zttf/utils.py
+++ b/zttf/utils.py
@@ -0,0 +1,247 @@
+from struct import calcsize, pack, unpack
+
+
+class PackedFormat:
+    """ Class to allow simpler extraction of data from a stream into an object with
+        named attributes.
+        All child classes need a FORMAT list of dicts describing the data to be extracted.
+
+    """
+    FORMAT = []
+
+    def __init__(self, fh=None, data=None, endian='>'):
+        self.endian = endian
+        self.parsed = False
+        if fh is not None:
+            self.from_file(fh)
+        elif data is not None:
+            self.from_data(data)
+
+    def from_file(self, fh):
+        for _f in self.FORMAT:
+            if 'format' not in _f:
+                continue
+            _fmt = '{}{}'.format(self.endian, _f['format'])
+            _data = unpack(_fmt, fh.read(calcsize(_fmt)))[0]
+            if 'name' not in _f:
+                continue
+            if 'convert' in _f:
+                setattr(self, _f['name'] + '_raw', _data)
+                _fn = _f['convert'] if callable(_f['convert']) else getattr(self, _f['convert'])
+                if _fn is not None and callable(_fn):
+                    _data = _fn(_data)
+            setattr(self, _f['name'], _data)
+        self.parsed = True
+
+    def from_data(self, data):
+        offset = 0
+        for _f in self.FORMAT:
+            if 'format' not in _f:
+                continue
+            _fmt = '{}{}'.format(self.endian, _f['format'])
+            _data = unpack(_fmt, data[offset: offset + calcsize(_fmt)])[0]
+            setattr(self, _f['name'], _data)
+            offset += calcsize(_fmt)
+        self.parsed = True
+
+    def as_bytes(self):
+        output = b''
+        for _f in self.FORMAT:
+            _fmt = '{}{}'.format(self.endian, _f['format'])
+            if 'convert' in _f:
+                _val = getattr(self, _f['name'] + '_raw', '' if 's' in _f['format'] else 0)
+            else:
+                _val = getattr(self, _f['name'], '' if 's' in _f['format'] else 0)
+            output += pack(_fmt, _val)
+        return output
+
+    def as_string(self):
+        def _name_to_string(n):
+            return n.replace('_', ' ').capitalize()
+        ss = ''
+        for _f in self.FORMAT:
+            if 'name' not in _f:
+                continue
+            ss += '  {}: {}\n'.format(_name_to_string(_f['name']), getattr(self, _f['name']))
+        return ss
+
+    def as_table_string(self):
+        def _name_to_string(n):
+            return n.replace('_', ' ').capitalize()
+        ss = ''
+        offset = 0
+        for _f in self.FORMAT:
+            _sz = calcsize(_f['format'])
+            ss += ' {:04X} {:4s} {:>3d} '.format(offset, _f['format'], _sz)
+            if 'name' in _f and getattr(self, _f['name']) is not None:
+                ss += '{:30s}  {}'.format(_name_to_string(_f['name']), getattr(self, _f['name']))
+            offset += _sz
+            ss += '\n'
+        return ss
+
+    def __len__(self):
+        fmt = "{}".format(self.endian)
+        for _f in self.FORMAT:
+            fmt += _f['format']
+        return calcsize(fmt)
+
+
+def fixed_version(num):
+    """ Decode a fixed 16:16 bit floating point number into a version code.
+    :param num: fixed 16:16 floating point number as a 32-bit unsigned integer
+    :return: version number (float)
+    """
+    if num == 0x00005000:
+        return 0.5
+    elif num == 0x00010000:
+        return 1.0
+    elif num == 0x00020000:
+        return 2.0
+    elif num == 0x00025000:
+        return 2.5
+    elif num == 0x00030000:
+        return 3.0
+    return num
+
+
+def binary_search_parameters(length):
+    search_range = 1
+    entry_selector = 0
+    while search_range * 2 <= length:
+        search_range *= 2
+        entry_selector += 1
+    return entry_selector, search_range, length - search_range
+
+
+class Range:
+    def __init__(self, start = 0, glyph=0):
+        self.start = start
+        self.expand(start)
+        self.start_glyph = glyph
+        self.iddelta = glyph - start
+        self.offset = 0
+
+    def is_consecutive(self, n, g):
+        return n == self.end and g == self.start_glyph + n - self.start
+
+    def expand(self, n):
+        self.end = (n + 1) & 0xffff
+
+    def __str__(self):
+        return "CMAP: {} - {}  @  {}".format(self.start, self.end, self.iddelta)
+
+    def as_map(self):
+        # debugging....
+        return {n: n + self.iddelta for n in range(self.start, self.end)}
+
+    def char_list(self):
+        return range(self.start, self.end)
+
+    def char_to_glyph(self, char, fh):
+        if self.offset == 0:
+            return self.get_glyph(char)
+        ptr = self.get_offset(char)
+        fh.seek(ptr)
+        return self.get_glyph(unpack(">H", fh.read(2))[0])
+
+    def get_glyph(self, char):
+        if char < self.start or char > self.end:
+            return 0
+        return (char + self.iddelta) & 0xffff
+
+    def get_offset(self, char):
+        if char < self.start or char > self.end:
+            return 0
+        return self.offset + 2 * (char - self.start)
+
+
+def read_list_int16(fh, n):
+    fmt = ">{}h".format(n)
+    return unpack(fmt, fh.read(calcsize(fmt)))
+
+
+def read_list_uint16(fh, n):
+    fmt = ">{}H".format(n)
+    return unpack(fmt, fh.read(calcsize(fmt)))
+
+
+def read_list_uint32(fh, n):
+    fmt = ">{}I".format(n)
+    return unpack(fmt, fh.read(calcsize(fmt)))
+
+
+def ttf_checksum(data):
+    data += b'\0' * (len(data) % 4)
+    n_uint32 = int(len(data) / 4)
+    chksum = 0
+    for val in unpack(">{}I".format(n_uint32), data):
+        chksum += val
+    return chksum & 0xFFFFFFFF
+
+
+#############################################################################
+###
+### Glyph Utilities...
+###
+#############################################################################
+
+# Flag Constants
+GF_ARG_1_AND_2_ARE_WORDS = (1 << 0)
+GF_ARGS_ARE_XY_VALUES = (1 << 1)
+GF_ROUND_XY_TO_GRID = (1 << 2)
+GF_WE_HAVE_A_SCALE = (1 << 3)
+GF_RESERVED = (1 << 4)
+GF_MORE_COMPONENTS = (1 << 5)
+GF_WE_HAVE_AN_X_AND_Y_SCALE = (1 << 6)
+GF_WE_HAVE_A_TWO_BY_TWO = (1 << 7)
+GF_WE_HAVE_INSTRUCTIONS = (1 << 8)
+GF_USE_MY_METRICS = (1 << 9)
+GF_OVERLAP_COMPOUND = (1 << 10)
+GF_SCALED_COMPONENT_OFFSET = (1 << 11)
+GF_UNSCALED_COMPONENT_OFFSET = (1 << 12)
+
+
+def glyf_skip_format(flags):
+    """ Return the correct format for the data we will skip past based on flags set. """
+    skip = '>I' if flags & GF_ARG_1_AND_2_ARE_WORDS else '>H'
+    if flags & GF_WE_HAVE_A_SCALE:
+        return skip + 'H'
+    elif flags & GF_WE_HAVE_AN_X_AND_Y_SCALE:
+        return skip + 'I'
+    elif flags & GF_WE_HAVE_A_TWO_BY_TWO:
+        return skip + 'II'
+    return skip
+
+
+def glyph_more_components(flag):
+    return flag & GF_MORE_COMPONENTS
+
+
+def glyph_flags_decode(flag):
+    print("Glyph flag = {:04X}".format(flag))
+    if flag & GF_ARG_1_AND_2_ARE_WORDS:
+        print("GF_ARG_1_AND_2_ARE_WORDS")
+    if flag & GF_ARGS_ARE_XY_VALUES:
+        print("GF_ARGS_ARE_XY_VALUES")
+    if flag & GF_ROUND_XY_TO_GRID:
+        print("GF_ARGS_ROUND_XY_TO_GRID")
+    if flag & GF_WE_HAVE_A_SCALE:
+        print("GF_WE_HAVE_A_SCALE")
+    if flag & GF_RESERVED:
+        print("GF_RESERVED")
+    if flag & GF_MORE_COMPONENTS:
+        print("GF_MORE_COMPONENTS")
+    if flag & GF_WE_HAVE_AN_X_AND_Y_SCALE:
+        print("GF_WE_HAVE_AN_X_AND_Y_SCALE")
+    if flag & GF_WE_HAVE_A_TWO_BY_TWO:
+        print("GF_WE_HAVE_A_TWO_BY_TWO")
+    if flag & GF_WE_HAVE_INSTRUCTIONS:
+        print("GF_WE_HAVE_INSTRUCTIONS")
+    if flag & GF_USE_MY_METRICS:
+        print("GF_USE_MY_METRICS")
+    if flag & GF_OVERLAP_COMPOUND:
+        print("GF_OVERLAP_COMPOUND")
+    if flag & GF_SCALED_COMPONENT_OFFSET:
+        print("GF_SCALED_COMPONENT_OFFSET")
+    if flag & GF_UNSCALED_COMPONENT_OFFSET:
+        print("GF_UNSCALED_COMPONENT_OFFSET")