# @file ConvertMasmToNasm.py # This script assists with conversion of MASM assembly syntax to NASM # # Copyright (c) 2007 - 2016, Intel Corporation. All rights reserved.
# # This program and the accompanying materials # are licensed and made available under the terms and conditions of the BSD License # which accompanies this distribution. The full text of the license may be found at # http://opensource.org/licenses/bsd-license.php # # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. # # # Import Modules # import argparse import os.path import re import StringIO import subprocess import sys class UnsupportedConversion(Exception): pass class NoSourceFile(Exception): pass class UnsupportedArch(Exception): unsupported = ('aarch64', 'arm', 'ebc', 'ipf') class CommonUtils: # Version and Copyright VersionNumber = "0.01" __version__ = "%prog Version " + VersionNumber __copyright__ = "Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved." __usage__ = "%prog [options] source.asm [destination.nasm]" def __init__(self, clone=None): if clone is None: self.args = self.ProcessCommandLine() else: self.args = clone.args self.unsupportedSyntaxSeen = False self.src = self.args.source assert(os.path.exists(self.src)) self.dirmode = os.path.isdir(self.src) srcExt = os.path.splitext(self.src)[1] assert (self.dirmode or srcExt != '.nasm') self.infmode = not self.dirmode and srcExt == '.inf' self.diff = self.args.diff self.git = self.args.git self.force = self.args.force if clone is None: self.rootdir = os.getcwd() self.DetectGit() else: self.rootdir = clone.rootdir self.gitdir = clone.gitdir self.gitemail = clone.gitemail def ProcessCommandLine(self): parser = argparse.ArgumentParser(description=self.__copyright__) parser.add_argument('--version', action='version', version='%(prog)s ' + self.VersionNumber) parser.add_argument("-q", "--quiet", action="store_true", help="Disable all messages except FATAL ERRORS.") parser.add_argument("--git", action="store_true", help="Use git to create commits for each file converted") parser.add_argument("--diff", action="store_true", help="Show diff of conversion") parser.add_argument("-f", "--force", action="store_true", help="Force conversion even if unsupported") parser.add_argument('source', help='MASM input file') parser.add_argument('dest', nargs='?', help='NASM output file (default=input.nasm; - for stdout)') return parser.parse_args() def RootRelative(self, path): result = path if result.startswith(self.rootdir): result = result[len(self.rootdir):] while len(result) > 0 and result[0] in '/\\': result = result[1:] return result def MatchAndSetMo(self, regexp, string): self.mo = regexp.match(string) return self.mo is not None def SearchAndSetMo(self, regexp, string): self.mo = regexp.search(string) return self.mo is not None def ReplacePreserveSpacing(self, string, find, replace): if len(find) >= len(replace): padded = replace + (' ' * (len(find) - len(replace))) return string.replace(find, padded) elif find.find(replace) >= 0: return string.replace(find, replace) else: lenDiff = len(replace) - len(find) result = string for i in range(lenDiff, -1, -1): padded = find + (' ' * i) result = result.replace(padded, replace) return result def DetectGit(self): lastpath = os.path.realpath(self.src) self.gitdir = None while True: path = os.path.split(lastpath)[0] if path == lastpath: self.gitemail = None return candidate = os.path.join(path, '.git') if os.path.isdir(candidate): self.gitdir = candidate self.gitemail = self.FormatGitEmailAddress() return lastpath = path def FormatGitEmailAddress(self): if not self.git or not self.gitdir: return '' cmd = ('git', 'config', 'user.name') name = self.RunAndCaptureOutput(cmd).strip() cmd = ('git', 'config', 'user.email') email = self.RunAndCaptureOutput(cmd).strip() if name.find(',') >= 0: name = '"' + name + '"' return name + ' <' + email + '>' def RunAndCaptureOutput(self, cmd, checkExitCode=True, pipeIn=None): if pipeIn: subpStdin = subprocess.PIPE else: subpStdin = None p = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stdin=subpStdin) (stdout, stderr) = p.communicate(pipeIn) if checkExitCode: if p.returncode != 0: print 'command:', ' '.join(cmd) print 'stdout:', stdout print 'stderr:', stderr print 'return:', p.returncode assert p.returncode == 0 return stdout def FileUpdated(self, path): if not self.git or not self.gitdir: return cmd = ('git', 'add', path) self.RunAndCaptureOutput(cmd) def FileAdded(self, path): self.FileUpdated(path) def RemoveFile(self, path): if not self.git or not self.gitdir: return cmd = ('git', 'rm', path) self.RunAndCaptureOutput(cmd) def FileConversionFinished(self, pkg, module, src, dst): if not self.git or not self.gitdir: return if not self.args.quiet: print 'Committing: Conversion of', dst prefix = ' '.join(filter(lambda a: a, [pkg, module])) message = '' if self.unsupportedSyntaxSeen: message += 'ERROR! ' message += '%s: Convert %s to NASM\n' % (prefix, src) message += '\n' message += 'The %s script was used to convert\n' % sys.argv[0] message += '%s to %s\n' % (src, dst) message += '\n' message += 'Contributed-under: TianoCore Contribution Agreement 1.0\n' assert(self.gitemail is not None) message += 'Signed-off-by: %s\n' % self.gitemail cmd = ('git', 'commit', '-F', '-') self.RunAndCaptureOutput(cmd, pipeIn=message) class ConvertAsmFile(CommonUtils): def __init__(self, src, dst, clone): CommonUtils.__init__(self, clone) self.ConvertAsmFile(src, dst) self.FileAdded(dst) self.RemoveFile(src) def ConvertAsmFile(self, inputFile, outputFile=None): self.globals = set() self.unsupportedSyntaxSeen = False self.inputFilename = inputFile if not outputFile: outputFile = os.path.splitext(inputFile)[0] + '.nasm' self.outputFilename = outputFile fullSrc = os.path.realpath(inputFile) srcParentDir = os.path.basename(os.path.split(fullSrc)[0]) maybeArch = srcParentDir.lower() if maybeArch in UnsupportedArch.unsupported: raise UnsupportedArch self.ia32 = maybeArch == 'ia32' self.x64 = maybeArch == 'x64' self.inputFileBase = os.path.basename(self.inputFilename) self.outputFileBase = os.path.basename(self.outputFilename) if self.outputFilename == '-' and not self.diff: self.output = sys.stdout else: self.output = StringIO.StringIO() if not self.args.quiet: dirpath, src = os.path.split(self.inputFilename) dirpath = self.RootRelative(dirpath) dst = os.path.basename(self.outputFilename) print 'Converting:', dirpath, src, '->', dst lines = open(self.inputFilename).readlines() self.Convert(lines) if self.outputFilename == '-': if self.diff: sys.stdout.write(self.output.getvalue()) self.output.close() else: f = open(self.outputFilename, 'wb') f.write(self.output.getvalue()) f.close() self.output.close() endOfLineRe = re.compile(r''' \s* ( ; .* )? \n $ ''', re.VERBOSE | re.MULTILINE ) begOfLineRe = re.compile(r''' \s* ''', re.VERBOSE ) def Convert(self, lines): self.proc = None self.anonLabelCount = -1 output = self.output self.oldAsmEmptyLineCount = 0 self.newAsmEmptyLineCount = 0 for line in lines: mo = self.begOfLineRe.search(line) assert mo is not None self.indent = mo.group() lineWithoutBeginning = line[len(self.indent):] mo = self.endOfLineRe.search(lineWithoutBeginning) if mo is None: endOfLine = '' else: endOfLine = mo.group() oldAsm = line[len(self.indent):len(line) - len(endOfLine)] self.originalLine = line.rstrip() if line.strip() == '': self.oldAsmEmptyLineCount += 1 self.TranslateAsm(oldAsm, endOfLine) if line.strip() != '': self.oldAsmEmptyLineCount = 0 procDeclRe = re.compile(r''' ([\w@][\w@0-9]*) \s+ PROC (?: \s+ NEAR | FAR )? (?: \s+ C )? (?: \s+ (PUBLIC | PRIVATE) )? (?: \s+ USES ( (?: \s+ \w[\w0-9]* )+ ) )? \s* $ ''', re.VERBOSE | re.IGNORECASE ) procEndRe = re.compile(r''' ([\w@][\w@0-9]*) \s+ ENDP \s* $ ''', re.VERBOSE | re.IGNORECASE ) varAndTypeSubRe = r' (?: [\w@][\w@0-9]* ) (?: \s* : \s* \w+ )? ' publicRe = re.compile(r''' PUBLIC \s+ ( %s (?: \s* , \s* %s )* ) \s* $ ''' % (varAndTypeSubRe, varAndTypeSubRe), re.VERBOSE | re.IGNORECASE ) varAndTypeSubRe = re.compile(varAndTypeSubRe, re.VERBOSE | re.IGNORECASE) macroDeclRe = re.compile(r''' ([\w@][\w@0-9]*) \s+ MACRO \s* $ ''', re.VERBOSE | re.IGNORECASE ) sectionDeclRe = re.compile(r''' ([\w@][\w@0-9]*) \s+ ( SECTION | ENDS ) \s* $ ''', re.VERBOSE | re.IGNORECASE ) externRe = re.compile(r''' EXTE?RN \s+ (?: C \s+ )? ([\w@][\w@0-9]*) \s* : \s* (\w+) \s* $ ''', re.VERBOSE | re.IGNORECASE ) externdefRe = re.compile(r''' EXTERNDEF \s+ (?: C \s+ )? ([\w@][\w@0-9]*) \s* : \s* (\w+) \s* $ ''', re.VERBOSE | re.IGNORECASE ) protoRe = re.compile(r''' ([\w@][\w@0-9]*) \s+ PROTO (?: \s+ .* )? \s* $ ''', re.VERBOSE | re.IGNORECASE ) defineDataRe = re.compile(r''' ([\w@][\w@0-9]*) \s+ ( db | dw | dd | dq ) \s+ ( .*? ) \s* $ ''', re.VERBOSE | re.IGNORECASE ) equRe = re.compile(r''' ([\w@][\w@0-9]*) \s+ EQU \s+ (\S.*?) \s* $ ''', re.VERBOSE | re.IGNORECASE ) ignoreRe = re.compile(r''' \. (?: const | mmx | model | xmm | x?list | [3-6]86p? ) | page (?: \s+ .* )? \s* $ ''', re.VERBOSE | re.IGNORECASE ) whitespaceRe = re.compile(r'\s+', re.MULTILINE) def TranslateAsm(self, oldAsm, endOfLine): assert(oldAsm.strip() == oldAsm) endOfLine = endOfLine.replace(self.inputFileBase, self.outputFileBase) oldOp = oldAsm.split() if len(oldOp) >= 1: oldOp = oldOp[0] else: oldOp = '' if oldAsm == '': newAsm = oldAsm self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif oldOp in ('#include', ): newAsm = oldAsm self.EmitLine(oldAsm + endOfLine) elif oldOp.lower() in ('end', 'title', 'text'): newAsm = '' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif oldAsm.lower() == '@@:': self.anonLabelCount += 1 self.EmitLine(self.anonLabel(self.anonLabelCount) + ':') elif self.MatchAndSetMo(self.ignoreRe, oldAsm): newAsm = '' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif oldAsm.lower() == 'ret': for i in range(len(self.uses) - 1, -1, -1): register = self.uses[i] self.EmitNewContent('pop ' + register) newAsm = 'ret' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) self.uses = tuple() elif oldOp.lower() == 'lea': newAsm = self.ConvertLea(oldAsm) self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif oldAsm.lower() == 'end': newAsm = '' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) self.uses = tuple() elif self.MatchAndSetMo(self.equRe, oldAsm): equ = self.mo.group(1) newAsm = '%%define %s %s' % (equ, self.mo.group(2)) self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif self.MatchAndSetMo(self.externRe, oldAsm) or \ self.MatchAndSetMo(self.protoRe, oldAsm): extern = self.mo.group(1) self.NewGlobal(extern) newAsm = 'extern ' + extern self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif self.MatchAndSetMo(self.externdefRe, oldAsm): newAsm = '' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif self.MatchAndSetMo(self.macroDeclRe, oldAsm): newAsm = '%%macro %s 0' % self.mo.group(1) self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif oldOp.lower() == 'endm': newAsm = r'%endmacro' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif self.MatchAndSetMo(self.sectionDeclRe, oldAsm): name = self.mo.group(1) ty = self.mo.group(2) if ty.lower() == 'section': newAsm = '.' + name else: newAsm = '' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif self.MatchAndSetMo(self.procDeclRe, oldAsm): proc = self.proc = self.mo.group(1) visibility = self.mo.group(2) if visibility is None: visibility = '' else: visibility = visibility.lower() if visibility != 'private': self.NewGlobal(self.proc) proc = 'ASM_PFX(' + proc + ')' self.EmitNewContent('global ' + proc) newAsm = proc + ':' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) uses = self.mo.group(3) if uses is not None: uses = filter(None, uses.split()) else: uses = tuple() self.uses = uses for register in self.uses: self.EmitNewContent(' push ' + register) elif self.MatchAndSetMo(self.procEndRe, oldAsm): newAsm = '' self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif self.MatchAndSetMo(self.publicRe, oldAsm): publics = re.findall(self.varAndTypeSubRe, self.mo.group(1)) publics = map(lambda p: p.split(':')[0].strip(), publics) for i in range(len(publics) - 1): name = publics[i] self.EmitNewContent('global ASM_PFX(%s)' % publics[i]) self.NewGlobal(name) name = publics[-1] self.NewGlobal(name) newAsm = 'global ASM_PFX(%s)' % name self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) elif self.MatchAndSetMo(self.defineDataRe, oldAsm): name = self.mo.group(1) ty = self.mo.group(2) value = self.mo.group(3) if value == '?': value = 0 newAsm = '%s: %s %s' % (name, ty, value) newAsm = self.CommonConversions(newAsm) self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) else: newAsm = self.CommonConversions(oldAsm) self.EmitAsmWithComment(oldAsm, newAsm, endOfLine) def NewGlobal(self, name): regex = re.compile(r'(? 1: emitNewLine = False if emitNewLine: self.EmitLine(newLine.rstrip()) elif self.diff: print '-%s' % self.originalLine leaRe = re.compile(r''' (lea \s+) ([\w@][\w@0-9]*) \s* , \s* (\S (?:.*\S)?) \s* $ ''', re.VERBOSE | re.IGNORECASE ) def ConvertLea(self, oldAsm): newAsm = oldAsm if self.MatchAndSetMo(self.leaRe, oldAsm): lea = self.mo.group(1) dst = self.mo.group(2) src = self.mo.group(3) if src.find('[') < 0: src = '[' + src + ']' newAsm = lea + dst + ', ' + src newAsm = self.CommonConversions(newAsm) return newAsm ptrRe = re.compile(r''' (?>', 'or': '|', } def ConvertBitwiseOp(self, oldAsm): newAsm = oldAsm while self.SearchAndSetMo(self.binaryBitwiseOpRe, newAsm): prefix = self.mo.group(1) op = self.bitwiseOpReplacements[self.mo.group(2).lower()] newAsm = newAsm[:self.mo.start(0)] + prefix + op + \ newAsm[self.mo.end(0):] while self.SearchAndSetMo(self.unaryBitwiseOpRe, newAsm): op = self.bitwiseOpReplacements[self.mo.group(1).lower()] newAsm = newAsm[:self.mo.start(0)] + op + newAsm[self.mo.end(0):] return newAsm sectionRe = re.compile(r''' \. ( code | data ) (?: \s+ .* )? \s* $ ''', re.VERBOSE | re.IGNORECASE ) segmentRe = re.compile(r''' ( code | data ) (?: \s+ SEGMENT ) (?: \s+ .* )? \s* $ ''', re.VERBOSE | re.IGNORECASE ) def ConvertSection(self, oldAsm): newAsm = oldAsm if self.MatchAndSetMo(self.sectionRe, newAsm) or \ self.MatchAndSetMo(self.segmentRe, newAsm): name = self.mo.group(1).lower() if name == 'code': if self.x64: self.EmitLine('DEFAULT REL') name = 'text' newAsm = 'SECTION .' + name return newAsm fwordRe = re.compile(r''' (?NASM conversion unsupported for', dst notConverted.append(dst) except NoSourceFile: if not self.args.quiet: print 'Source file missing for', reldst notConverted.append(dst) except UnsupportedArch: unsupportedArchCount += 1 else: if didSomething: self.ConversionFinished(dst) if len(notConverted) > 0 and not self.args.quiet: for dst in notConverted: reldst = self.RootRelative(dst) print 'Unabled to convert', reldst if unsupportedArchCount > 0 and not self.args.quiet: print 'Skipped', unsupportedArchCount, 'files based on architecture' def UpdateInfAsmFile(self, dst, IgnoreMissingAsm=False): infPath = os.path.split(os.path.realpath(self.inf))[0] asmSrc = os.path.splitext(dst)[0] + '.asm' fullSrc = os.path.join(infPath, asmSrc) fullDst = os.path.join(infPath, dst) srcParentDir = os.path.basename(os.path.split(fullSrc)[0]) if srcParentDir.lower() in UnsupportedArch.unsupported: raise UnsupportedArch elif not os.path.exists(fullSrc): if not IgnoreMissingAsm: raise NoSourceFile else: # not os.path.exists(fullDst): conv = ConvertAsmFile(fullSrc, fullDst, self) self.unsupportedSyntaxSeen = conv.unsupportedSyntaxSeen lastLine = '' fileChanged = False for i in range(len(self.lines)): line = self.lines[i].rstrip() updatedLine = line for src in self.dstToSrc[dst]: assert self.srcToDst[src] == dst updatedLine = self.ReplacePreserveSpacing( updatedLine, src, dst) lineChanged = updatedLine != line if lineChanged: if lastLine.strip() == updatedLine.strip(): self.lines[i] = None else: self.lines[i] = updatedLine + '\r\n' if self.diff: if lineChanged: print '-%s' % line if self.lines[i] is not None: print '+%s' % updatedLine else: print '', line fileChanged |= lineChanged if self.lines[i] is not None: lastLine = self.lines[i] if fileChanged: self.lines = filter(lambda l: l is not None, self.lines) for src in self.dstToSrc[dst]: if not src.endswith('.asm'): fullSrc = os.path.join(infPath, src) if os.path.exists(fullSrc): self.RemoveFile(fullSrc) if fileChanged: f = open(self.inf, 'wb') f.writelines(self.lines) f.close() self.FileUpdated(self.inf) def ConversionFinished(self, dst): asmSrc = os.path.splitext(dst)[0] + '.asm' self.FileConversionFinished( self.packageName, self.moduleName, asmSrc, dst) class ConvertInfFiles(CommonUtils): def __init__(self, infs, clone): CommonUtils.__init__(self, clone) infs = map(lambda i: ConvertInfFile(i, self), infs) infs = filter(lambda i: len(i) > 0, infs) dstToInfs = {'order': []} for inf in infs: for dst in inf: fulldst = os.path.realpath(os.path.join(inf.dir, dst)) pair = (inf, dst) if fulldst in dstToInfs: dstToInfs[fulldst].append(pair) else: dstToInfs['order'].append(fulldst) dstToInfs[fulldst] = [pair] notConverted = [] unsupportedArchCount = 0 for dst in dstToInfs['order']: didSomething = False try: for inf, reldst in dstToInfs[dst]: inf.UpdateInfAsmFile(reldst, IgnoreMissingAsm=didSomething) didSomething = True except UnsupportedConversion: if not self.args.quiet: print 'MASM=>NASM conversion unsupported for', reldst notConverted.append(dst) except NoSourceFile: if not self.args.quiet: print 'Source file missing for', reldst notConverted.append(dst) except UnsupportedArch: unsupportedArchCount += 1 else: if didSomething: inf.ConversionFinished(reldst) if len(notConverted) > 0 and not self.args.quiet: for dst in notConverted: reldst = self.RootRelative(dst) print 'Unabled to convert', reldst if unsupportedArchCount > 0 and not self.args.quiet: print 'Skipped', unsupportedArchCount, 'files based on architecture' class ConvertDirectories(CommonUtils): def __init__(self, paths, clone): CommonUtils.__init__(self, clone) self.paths = paths self.ConvertInfAndAsmFiles() def ConvertInfAndAsmFiles(self): infs = list() for path in self.paths: assert(os.path.exists(path)) for path in self.paths: for root, dirs, files in os.walk(path): for d in ('.svn', '.git'): if d in dirs: dirs.remove(d) for f in files: if f.lower().endswith('.inf'): inf = os.path.realpath(os.path.join(root, f)) infs.append(inf) ConvertInfFiles(infs, self) class ConvertAsmApp(CommonUtils): def __init__(self): CommonUtils.__init__(self) src = self.args.source dst = self.args.dest if self.infmode: ConvertInfFiles(src, self) elif self.dirmode: ConvertDirectories((src,), self) elif not self.dirmode: ConvertAsmFile(src, dst, self) ConvertAsmApp()