added python script to convert MASM to ACME syntax.
git-svn-id: https://svn.code.sf.net/p/acme-crossass/code-0/trunk@319 4df02467-bbd4-4a76-a152-e7ce94205b78
This commit is contained in:
		
							
								
								
									
										277
									
								
								contrib/toacme/masm2acme.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										277
									
								
								contrib/toacme/masm2acme.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,277 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  | import sys | ||||||
|  |  | ||||||
|  | def line_preprocess(line): | ||||||
|  |     "split line into comment, strings and everything else" | ||||||
|  |     result = [] | ||||||
|  |     part = "" | ||||||
|  |     comment = None | ||||||
|  |     quotes = None | ||||||
|  |     for char in line: | ||||||
|  |         # are we inside comment? | ||||||
|  |         if comment: | ||||||
|  |             comment += char | ||||||
|  |             continue | ||||||
|  |         # are we inside quotes? | ||||||
|  |         if quotes: | ||||||
|  |             part += char | ||||||
|  |             if char == quotes: | ||||||
|  |                 # end of quotes | ||||||
|  |                 # if previous part was also quoted, we have to combine them, | ||||||
|  |                 # because "a""b" really means a"b | ||||||
|  |                 if result and result[-1][-1] == quotes: | ||||||
|  |                     part = result[-1][:-1] + "\\" + part | ||||||
|  |                     result.pop() | ||||||
|  |                 # use singlequotes for 1-char strings | ||||||
|  |                 if len(part) == 3: | ||||||
|  |                     part = "'" + part[1] + "'" | ||||||
|  |                 # escape backslash, singlequote, doublequote | ||||||
|  |                 if part == "'\\'": | ||||||
|  |                     part = "'\\\\'" | ||||||
|  |                 elif part == "'''": | ||||||
|  |                     part = "'\\''" | ||||||
|  |                 elif part == '"\\""': | ||||||
|  |                     part = "'\"'" | ||||||
|  |                 result.append(part) # move quoted string to result list | ||||||
|  |                 part = "" | ||||||
|  |                 quotes = None | ||||||
|  |             continue | ||||||
|  |         # not in quotes | ||||||
|  |         if char == '"' or char == "'": | ||||||
|  |             # new quotes, so finish old part | ||||||
|  |             if part and part != ' ': | ||||||
|  |                 result.append(part) | ||||||
|  |             # ...and start new one | ||||||
|  |             part = char | ||||||
|  |             quotes = char | ||||||
|  |             continue | ||||||
|  |         # comment? | ||||||
|  |         if char == ';': | ||||||
|  |             # finish old part | ||||||
|  |             if part and part != ' ': | ||||||
|  |                 result.append(part) | ||||||
|  |             part = "" | ||||||
|  |             # ...and start comment | ||||||
|  |             comment = char | ||||||
|  |             continue | ||||||
|  |         ## tab-to-space: | ||||||
|  |         #if char == '\t': | ||||||
|  |         #    char = ' ' | ||||||
|  |         # skip blanks after blank | ||||||
|  |         if part.endswith(' ') and char == ' ': | ||||||
|  |             pass | ||||||
|  |         else: | ||||||
|  |             # all other characters: | ||||||
|  |             part += char | ||||||
|  |     # quotes still open at end of line? | ||||||
|  |     if quotes: | ||||||
|  |         raise Exception("Unterminated string constant in input data") | ||||||
|  |     # append last part | ||||||
|  |     if part: | ||||||
|  |         result.append(part) | ||||||
|  |     return result, comment | ||||||
|  |  | ||||||
|  | def single_out(items, substring): | ||||||
|  |     "split any item containing substring into first part, substring, second part. empty parts are dropped." | ||||||
|  |     result = [] | ||||||
|  |     for i in items: | ||||||
|  |         while substring in i: | ||||||
|  |             parts = i.partition(substring) | ||||||
|  |             if parts[0]: | ||||||
|  |                 result.append(parts[0]) | ||||||
|  |             result.append(substring) | ||||||
|  |             i = parts[2] | ||||||
|  |         if i: | ||||||
|  |             result.append(i) | ||||||
|  |     return result | ||||||
|  |  | ||||||
|  | def unquoted_tokenize(part): | ||||||
|  |     "split part into tokens (so do not pass string literals!)" | ||||||
|  |     # split at spaces (and throw away all spaces) | ||||||
|  |     items = part.split() | ||||||
|  |     # split at commas, braces, ... | ||||||
|  |     items = single_out(items, ',') | ||||||
|  |     items = single_out(items, '/') | ||||||
|  |     items = single_out(items, '=') | ||||||
|  |     items = single_out(items, '+') | ||||||
|  |     items = single_out(items, '-') | ||||||
|  |     items = single_out(items, '*') | ||||||
|  |     return items | ||||||
|  |  | ||||||
|  | opcodes_to_keep = [ | ||||||
|  |     # std 6502: | ||||||
|  |     "brk", "rti", "rts", "nop", | ||||||
|  |     "php", "plp", "pha", "pla", | ||||||
|  |     "bpl", "bmi", "bvc", "bvs", "bcc", "bcs", "bne", "beq", | ||||||
|  |     "clc", "sec", "cli", "sei", "clv", "cld", "sed", | ||||||
|  |     "dex", "dey", "inx", "iny", | ||||||
|  |     "tax", "tay", "txa", "tya", "tsx", "txs", | ||||||
|  |     # new in 65c02: | ||||||
|  |     "phx", "plx", "phy", "ply", "bra"   # inc, dec | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | opcodes_with_arg = [ | ||||||
|  |     # std 6502: | ||||||
|  |     "ora", "and", "eor", "adc", "sta", "lda", "cmp", "sbc", | ||||||
|  |     "asl", "rol", "lsr", "ror", "dec", "inc", | ||||||
|  |     "ldx", "stx", "cpx", "ldy", "sty", "cpy", | ||||||
|  |     "jsr", "jmp", "bit", | ||||||
|  |     # new in 65c02: | ||||||
|  |     "tsb", "trb", "stz" | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | token_substitutions = { | ||||||
|  |     ".": "*",   # program counter | ||||||
|  |     ":not:": "not", # operator | ||||||
|  |     ":eor:": "xor", # operator | ||||||
|  |     ":msb:": ">",   # operator? | ||||||
|  | } | ||||||
|  |  | ||||||
|  | opcodes_to_replace = { | ||||||
|  |     "org": "*=", | ||||||
|  |     "cpu": ";!cpu",         # TODO: support properly! | ||||||
|  |     "=": "!tx", | ||||||
|  |     "$": "!wo", # actually & instead of $, but substitution was done earlier | ||||||
|  |     "end": "!eof", | ||||||
|  |     "assert": "+assert", | ||||||
|  |     "lnk": ";!source",      # TODO: support properly! | ||||||
|  |     "asla": "\tasl", | ||||||
|  |     "lsra": "\tlsr", | ||||||
|  |     "rola": "\trol", | ||||||
|  |     "rora": "\tror", | ||||||
|  |     "dea": "\tdec",    # 65c02 | ||||||
|  |     "ina": "\tinc",    # 65c02 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | opcodes_to_rename = { | ||||||
|  |     "clr": "stz"    # 65c02 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | def convert_opcodes(parts): | ||||||
|  |     "convert mnemonics and pseudo opcodes" | ||||||
|  |     if not parts: | ||||||
|  |         return parts | ||||||
|  |     op = parts[0] | ||||||
|  |     parts = parts[1:] | ||||||
|  |     if op in opcodes_to_keep: | ||||||
|  |         return ["\t" + op] + parts | ||||||
|  |     if op in opcodes_to_replace: | ||||||
|  |         return [opcodes_to_replace[op]] + parts | ||||||
|  |     # wtf?! | ||||||
|  |     if op == "jmi": | ||||||
|  |         op = "jmpi" | ||||||
|  |     elif op == "jmix": | ||||||
|  |         op = "jmpxi" | ||||||
|  |     # convert addressing modes | ||||||
|  |     if len(op) > 3: | ||||||
|  |         oldop = op | ||||||
|  |         am = op[3:] | ||||||
|  |         op = op[:3] | ||||||
|  |         if am == "im": | ||||||
|  |             if parts[0] == '/': | ||||||
|  |                 parts[0] = "#>" | ||||||
|  |             elif parts[0][0] >= 'a' and parts[0][0] <= 'z': | ||||||
|  |                 parts[0] = "#< " + parts[0] | ||||||
|  |             else: | ||||||
|  |                 parts[0] = "#" + parts[0] | ||||||
|  |         elif am == "ax" or am == "zx": | ||||||
|  |             parts[-1] = parts[-1] + ", x" | ||||||
|  |         elif am == "ay" or am == "zy": | ||||||
|  |             parts[-1] = parts[-1] + ", y" | ||||||
|  |         elif am == "xi": | ||||||
|  |             parts[0] = "(" + parts[0] | ||||||
|  |             parts[-1] = parts[-1] + ", x)" | ||||||
|  |         elif am == "iy": | ||||||
|  |             parts[0] = "(" + parts[0] | ||||||
|  |             parts[-1] = parts[-1] + "), y" | ||||||
|  |         elif am == "i": | ||||||
|  |             parts[0] = "(" + parts[0] | ||||||
|  |             parts[-1] = parts[-1] + ")" | ||||||
|  |         else: | ||||||
|  |             op = oldop | ||||||
|  |     # convert | ||||||
|  |     if op in opcodes_to_rename: | ||||||
|  |         op = opcodes_to_rename[op] | ||||||
|  |     if op in opcodes_with_arg: | ||||||
|  |         return ["\t" + op] + parts | ||||||
|  |     return [op] + parts | ||||||
|  |  | ||||||
|  | def process_code(parts): | ||||||
|  |     "split code parts at special characters" | ||||||
|  |     prefix = "" | ||||||
|  |     # remember if line starts with space | ||||||
|  |     indented = (parts[0][0] == " ") | ||||||
|  |     # because now spaces are dropped | ||||||
|  |     result = [] | ||||||
|  |     for part in parts: | ||||||
|  |         # do not process quoted strings any further | ||||||
|  |         if part.startswith("'") or part.startswith('"'): | ||||||
|  |             result.append(part) | ||||||
|  |             continue | ||||||
|  |         # convert to lower case | ||||||
|  |         part = part.lower() | ||||||
|  |         # substitute: & becomes $ | ||||||
|  |         part = "$".join(part.split("&")) | ||||||
|  |         # all other parts are split up into tokens | ||||||
|  |         result.extend(unquoted_tokenize(part)) | ||||||
|  |     # convert some tokens (string literals are not in danger, as they include quotes) | ||||||
|  |     parts = result | ||||||
|  |     result = [] | ||||||
|  |     for part in parts: | ||||||
|  |         if part in token_substitutions: | ||||||
|  |             part = token_substitutions[part] | ||||||
|  |         result.append(part) | ||||||
|  |     # now convert | ||||||
|  |     label = "" | ||||||
|  |     if indented: | ||||||
|  |         # code | ||||||
|  |         result = convert_opcodes(result) | ||||||
|  |     else: | ||||||
|  |         # label or symbol definition | ||||||
|  |         if len(result) > 1 and result[1] == "*": | ||||||
|  |             # symbol definition | ||||||
|  |             symdef = result[0] + "\t=" | ||||||
|  |             result = [symdef] + convert_opcodes(result[2:]) | ||||||
|  |         else: | ||||||
|  |             # label | ||||||
|  |             label = result[0] | ||||||
|  |             result = convert_opcodes(result[1:]) | ||||||
|  |     if result: | ||||||
|  |         label = label + "\t" | ||||||
|  |     return label, result | ||||||
|  |  | ||||||
|  | def process_line(line): | ||||||
|  |     "process a single line of input and return converted version" | ||||||
|  |     # remove line ending, if there is one. don't care if NL or CR or combination | ||||||
|  |     while len(line) != 0 and (line[-1] == "\r" or line[-1] == "\n"): | ||||||
|  |         line = line[:-1] | ||||||
|  |     # step 1: split into strings, comments and everything else | ||||||
|  |     codeparts, comment = line_preprocess(line) | ||||||
|  |     # step 2: if there is anything before comment, process that | ||||||
|  |     if codeparts: | ||||||
|  |         prefix, codeparts = process_code(codeparts) | ||||||
|  |         # reassemble line | ||||||
|  |         line = prefix + " ".join(codeparts) | ||||||
|  |     else: | ||||||
|  |         line = "" | ||||||
|  |     if comment: | ||||||
|  |         line = line + comment | ||||||
|  |     return line + "\n" | ||||||
|  |  | ||||||
|  | def convert_file(input, output): | ||||||
|  |     "convert input file to output file line-by-line" | ||||||
|  |     with open(input, "rt") as infile: | ||||||
|  |         with open(output, "wt") as outfile: | ||||||
|  |             outfile.write(";ACME 0.97\n") | ||||||
|  |             for line in infile: | ||||||
|  |                 outfile.write(process_line(line)) | ||||||
|  |  | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     if len(sys.argv) != 3: | ||||||
|  |         sys.exit( | ||||||
|  | "Error: wrong number of arguments\n" | ||||||
|  | "\n" | ||||||
|  | "masm2acme.py converts a file from MASM to ACME syntax.\n" | ||||||
|  | "Usage: masm2acme.py INPUTFILE OUTPUTFILE\n" | ||||||
|  |         ) | ||||||
|  |     convert_file(sys.argv[1], sys.argv[2]) | ||||||
		Reference in New Issue
	
	Block a user