from io import BytesIO from struct import pack, unpack, calcsize, error as struct_error from zttf.objects import TTF_post, TTFHeader, TTFOffsetTable, TTF_kern, TTF_kern_subtable from zttf.utils import Range, glyph_more_components, glyf_skip_format, ttf_checksum, binary_search_parameters class TTFSubset: def __init__(self, parent, subset): self.parent = parent self.subset = subset self.tables = {} # We need to build 2 maps, one for character -> glyph and one # for glyph -> character self.orig_char_to_glyph = {} self.orig_glyph_to_char = {} self.glyph_map = {} self.char_to_glyph = {} self.glyph_to_char = {} self.cmap_ranges = [] self.required_glyphs = [0] self.metrics = [] self.fh = None def start_table(self, tag, data=None): b = BytesIO() if data is not None: b.write(data) self.tables[tag] = b return b def find_glyph_subset(self): for s in self.subset: self.parent.char_to_glyph(s) char_to_glyphs = self.parent.get_table(b'cmap').char_map() rqd = [] for code in self.subset: glyph = char_to_glyphs.get(code) if glyph is None: print("Unknown character in parent mapping: {}".format(code)) continue # print("character {} is glyph {}".format(code, glyph)) self.orig_char_to_glyph[code] = glyph self.orig_glyph_to_char.setdefault(glyph, []).append(code) if glyph not in rqd: rqd.append(glyph) for glyph in rqd: self.required_glyphs.append(glyph) self.required_glyphs.extend(self.parent.get_glyph_components(glyph)) self.required_glyphs.sort() self.glyph_map = {} for rg in self.required_glyphs: glyph = len(self.glyph_map) + 1 self.glyph_map[rg] = glyph if rg in self.orig_glyph_to_char: for cc in self.orig_glyph_to_char[rg]: self.char_to_glyph[cc] = glyph self.glyph_to_char[glyph] = self.orig_glyph_to_char[rg] def copy_tables(self): for tag in [b'name', b'cvt', b'fpgm', b'prep', b'gasp']: if tag in self.parent.tables: buff = self.start_table(tag) tbl = self.parent.header.get_tag(tag) self.fh.seek(tbl.offset) buff.write(self.fh.read(tbl.length)) new_post = TTF_post() for f in ['italic_angle', 'underline_position', 'Underline_thickness', 'is_fixed_pitch']: setattr(new_post, f, self.parent.get_table_attr(b'post', f)) self.start_table(b'post', new_post.as_bytes()) head = self.parent.copy_table(b'head') head.checksum_adj = 0 head.index_to_loc_format = 0 self.start_table(b'head', head.as_bytes()) hhea = self.parent.copy_table(b'hhea') hhea.number_of_metrics = len(self.metrics) self.start_table(b'hhea', hhea.as_bytes()) maxp = self.parent.copy_table(b'maxp') maxp.b_glyphs = len(self.required_glyphs) self.start_table(b'maxp', maxp.as_bytes()) self.start_table(b'os2', self.parent.copy_table(b'os2').as_bytes()) # todo - is it worth finding a way to subset the GPOS and LTSH tables? def build_cmap_ranges(self): # As we will likely have a scattered map we will use CMAP Format 4. # We take the character mappings we have and build 4 lists... # start code # end code # id delta # range offset self.cmap_ranges = [] for cc, glyph in sorted(self.char_to_glyph.items()): try: current = self.cmap_ranges[-1] if current is None or not current.is_consecutive(cc, glyph): self.cmap_ranges.append(Range(cc, glyph)) else: current.expand(cc) except IndexError: self.cmap_ranges.append(Range(cc, glyph)) def add_cmap_table(self): if self.cmap_ranges == []: self.build_cmap_ranges() self.cmap_ranges.append(Range(0xffff, 0)) self.cmap_ranges[-1].iddelta = 0 seg_count = len(self.cmap_ranges) src_range, entry_selector, range_shift = binary_search_parameters(seg_count * 2) length = 16 + 8 * seg_count + len(self.glyph_to_char) + 1 data = [ 0, # version 1, # number of subtables 3, # platform id (MS) 1, # endocing id (Unicode) 0, 12, # subtable location # subtable 4, # format length, # length 0, # language seg_count * 2, # seg count * 2 src_range, # search range (2 ** floor(log2(seg_count))) entry_selector, # entry selector log2(src_range / 2) seg_count * 2 - src_range, # range shift ( 2 * seg_count - search_range) ] data.extend([r.end for r in self.cmap_ranges]) data.append(0) data.extend([r.start for r in self.cmap_ranges]) buff = self.start_table(b'cmap') buff.write(pack(">{}H".format(len(data)), *data)) buff.write(pack(">{}h".format(len(self.cmap_ranges)), *[r.iddelta for r in self.cmap_ranges])) buff.write(pack(">{}H".format(len(self.cmap_ranges)), *[r.offset for r in self.cmap_ranges])) buff.write(pack(">{}H".format(len(self.cmap_ranges)), *[r.start_glyph for r in self.cmap_ranges])) def get_glyphs(self): locations = [] self.metrics = [] buff = self.start_table(b'glyf') for g in self.required_glyphs: locations.append(int(buff.tell() / 2)) data = self.parent.get_glyph_data(g) if data == b'': continue if unpack(">h", data[:2])[0] == -1: # need to adjust glyph index... pos = 10 while True: flags, next_glyph = unpack(">HH", data[pos: pos + 4]) data = data[:pos + 2] + pack(">H", self.glyph_map[next_glyph]) + data[pos+4:] pos += 4 + calcsize(glyf_skip_format(flags)) if not glyph_more_components(flags): break buff.write(data) self.metrics.append(self.parent.glyph_metrics[g]) loca = self.start_table(b'loca') loca.write(pack(">{}H".format(len(locations)), *locations)) hmtx = self.start_table(b'hmtx') for m in self.metrics: hmtx.write(pack(">Hh", *m)) def add_kern_data(self): entries = {} for k, diff in self.parent.glyph_kern.items(): if k[0] not in self.required_glyphs or k[1] not in self.required_glyphs: continue # print("mapping {} to ({}, {})".format(k, self.glyph_map[k[0]], self.glyph_map[k[1]])) entries[(self.glyph_map[k[0]], self.glyph_map[k[1]])] = diff if len(entries) == 0: return kern = self.start_table(b'kern') kh = TTF_kern() kh.version = 0 kh.num_tables = 1 kern.write(kh.as_bytes()) st = TTF_kern_subtable() st.length = len(st) + 6 * len(entries) + 8 st.version = 0 st.coverage = 1 kern.write(st.as_bytes()) kern.write(pack(">H", len(entries))) kern.write(pack(">HHH", *binary_search_parameters(len(entries)))) for key, diff in entries.items(): kern.write(pack(">HHh", key[0], key[1], diff)) # Put the TTF file together def output(self): """ Generate a binary based on the subset we have been given. """ self.fh = open(self.parent.filename, 'rb') self.fh.seek(self.parent.start_pos) self.find_glyph_subset() self.add_kern_data() self.copy_tables() self.add_cmap_table() self.get_glyphs() # self.dump_tables() self.fh.close() header = TTFHeader() header.num_tables = len(self.tables) header.version_raw = 0x00010000 output = BytesIO() header.entry_selector, header.search_range, header.range_shift = binary_search_parameters(len(self.tables)) output.write(header.as_bytes()) head_offset = 0 offset = output.tell() + 16 * len(self.tables) sorted_tables = sorted(self.tables.keys()) for tag in sorted_tables: if tag == b'head': head_offset = offset tbl = TTFOffsetTable() tbl.tag = tag tbl.offset = offset data = self.tables[tag].getvalue() tbl.length = len(data) tbl.calculate_checksum(data) offset += tbl.padded_length() output.write(tbl.as_bytes()) for tag in sorted_tables: data = self.tables[tag].getvalue() data += b'\0' * (len(data) % 4) output.write(data) checksum = 0xB1B0AFBA - ttf_checksum(output.getvalue()) data = output.getvalue() try: data = data[:head_offset + 8] + pack(">I", checksum) + data[head_offset + 12:] except struct_error: data = data[:head_offset + 8] + pack(">i", checksum) + data[head_offset + 12:] return data def dump_tables(self): for n in sorted(self.tables): print("{} {} bytes".format(n, self.tables[n].tell()))