summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Binkert <nate@binkert.org>2011-04-15 10:43:06 -0700
committerNathan Binkert <nate@binkert.org>2011-04-15 10:43:06 -0700
commite5ecfde222d6b76de7320750c219960e6f6ec3ca (patch)
treeabc2266d778fe470dcbabd090cc353f42c53127f
parent07815c3379d26a5d132696b41a5f1efc618cb0e6 (diff)
downloadgem5-e5ecfde222d6b76de7320750c219960e6f6ec3ca.tar.xz
util: python implementation of a routine that will sort includes
I didn't realize that the perl version existed when I started this, this version has a lot more features than the previous one since it will sort and separate python, system, and m5 headers in separate groups, it will remove duplicates, it will also convert c headers to stl headers
-rw-r--r--util/file_types.py82
-rwxr-xr-xutil/sort-includes91
-rw-r--r--util/sort_includes.py220
3 files changed, 302 insertions, 91 deletions
diff --git a/util/file_types.py b/util/file_types.py
index 8fc2b1af4..85e058db0 100644
--- a/util/file_types.py
+++ b/util/file_types.py
@@ -87,3 +87,85 @@ def lang_type(filename, firstline=None, openok=True):
# sorry, we couldn't detect the language
return None
+
+# directories and files to ignore by default
+default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
+default_file_ignore = frozenset(('parsetab.py', ))
+
+def find_files(base, languages=all_languages,
+ dir_ignore=default_dir_ignore,
+ file_ignore=default_file_ignore):
+ '''find all files in a directory and its subdirectories based on a
+ set of languages, ignore directories specified in dir_ignore and
+ files specified in file_ignore'''
+ if base[-1] != '/':
+ base += '/'
+
+ def update_dirs(dirs):
+ '''strip the ignored directories out of the provided list'''
+ index = len(dirs) - 1
+ for i,d in enumerate(reversed(dirs)):
+ if d in dir_ignore:
+ del dirs[index - i]
+
+ # walk over base
+ for root,dirs,files in os.walk(base):
+ root = root.replace(base, '', 1)
+
+ # strip ignored directories from the list
+ update_dirs(dirs)
+
+ for filename in files:
+ if filename in file_ignore:
+ # skip ignored files
+ continue
+
+ # try to figure out the language of the specified file
+ fullpath = os.path.join(base, root, filename)
+ language = lang_type(fullpath)
+
+ # if the file is one of the langauges that we want return
+ # its name and the language
+ if language in languages:
+ yield fullpath, language
+
+def update_file(dst, src, language, mutator):
+ '''update a file of the specified language with the provided
+ mutator generator. If inplace is provided, update the file in
+ place and return the handle to the updated file. If inplace is
+ false, write the updated file to cStringIO'''
+
+ # if the source and destination are the same, we're updating in place
+ inplace = dst == src
+
+ if isinstance(src, str):
+ # if a filename was provided, open the file
+ mode = 'r+' if inplace else 'r'
+ src = file(src, mode)
+
+ orig_lines = []
+
+ # grab all of the lines of the file and strip them of their line ending
+ old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
+ new_lines = list(mutator(old_lines, src.name, language))
+
+ for line in src.xreadlines():
+ line = line
+
+ if inplace:
+ # if we're updating in place and the file hasn't changed, do nothing
+ if old_lines == new_lines:
+ return
+
+ # otherwise, truncate the file and seek to the beginning.
+ dst = src
+ dst.truncate(0)
+ dst.seek(0)
+ elif isinstance(dst, str):
+ # if we're not updating in place and a destination file name
+ # was provided, create a file object
+ dst = file(dst, 'w')
+
+ for line in new_lines:
+ dst.write(line)
+ dst.write('\n')
diff --git a/util/sort-includes b/util/sort-includes
deleted file mode 100755
index 8ae40be52..000000000
--- a/util/sort-includes
+++ /dev/null
@@ -1,91 +0,0 @@
-#! /usr/bin/env perl
-# Copyright (c) 2003 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# Authors: Steve Reinhardt
-
-open (FOO, "<$ARGV[0]") or die;
-
-# Suck in everything before the first include
-# (line-by-line into @before list).
-while (($_ = <FOO>) && !/^#include/) {
- push @before, $_;
-}
-
-#print join("", @before);
-#print "##########################\n";
-
-# Suck in include lines into @includes list.
-# Skip blank lines (keep processing, but don't put in @includes).
-# End on first non-blank, non-include line.
-# Note that this means that files with comments or #ifdefs
-# interspersed among their #includes will only get the initial
-# set of #includes sorted.
-do {
- push @includes, $_ unless /^\s*$/;
-} while (($_ = <FOO>) && /^#include|^\s*$/);
-
-# Now sort the includes. This simple ordering function
-# puts system includes first, followed by non-system includes.
-# Within each group the sort is alphabetical.
-# We may want something a little more sophisticated.
-# Personally, I'd like to see something like:
-# <sys/*.h> - header files from sys subdir
-# <*.h> - other system headers
-# <*> - STL headers
-# "base/*" - M5 base headers
-# "sim/*" - M5 sim headers
-# "*" - other M5 headers
-# ...but I didn't have the energy to code that up.
-sub sortorder {
- my $sysa = ($a =~ /<.*>/);
- my $sysb = ($b =~ /<.*>/);
- return -1 if ($sysa && !$sysb);
- return 1 if ($sysb && !$sysa);
- return $a cmp $b;
-}
-
-@includes = sort sortorder @includes;
-#print join("", @includes);
-#print "##########################\n";
-
-# Put everything after the includes in the @after list.
-do {
- push @after, $_;
- if (/^#include/) {
- print "$ARGV[0]: ";
- print $after[0];
- exit 0;
- }
-} while ($_ = <FOO>);
-
-#print join("", @after);
-#print "##########################\n";
-
-# Print out the file with sorted includes.
-
-print join("", @before, @includes, @after);
-
diff --git a/util/sort_includes.py b/util/sort_includes.py
new file mode 100644
index 000000000..15d1f2788
--- /dev/null
+++ b/util/sort_includes.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python
+
+import os
+import re
+import sys
+
+from file_types import *
+
+cpp_c_headers = {
+ 'assert.h' : 'cassert',
+ 'ctype.h' : 'cctype',
+ 'errno.h' : 'cerrno',
+ 'float.h' : 'cfloat',
+ 'limits.h' : 'climits',
+ 'locale.h' : 'clocale',
+ 'math.h' : 'cmath',
+ 'setjmp.h' : 'csetjmp',
+ 'signal.h' : 'csignal',
+ 'stdarg.h' : 'cstdarg',
+ 'stddef.h' : 'cstddef',
+ 'stdio.h' : 'cstdio',
+ 'stdlib.h' : 'cstdlib',
+ 'string.h' : 'cstring',
+ 'time.h' : 'ctime',
+ 'wchar.h' : 'cwchar',
+ 'wctype.h' : 'cwctype',
+}
+
+include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
+def include_key(line):
+ '''Mark directories with a leading space so directories
+ are sorted before files'''
+
+ match = include_re.match(line)
+ assert match, line
+ keyword = match.group(2)
+ include = match.group(3)
+
+ # Everything but the file part needs to have a space prepended
+ parts = include.split('/')
+ if len(parts) == 2 and parts[0] == 'dnet':
+ # Don't sort the dnet includes with respect to each other, but
+ # make them sorted with respect to non dnet includes. Python
+ # guarantees that sorting is stable, so just clear the
+ # basename part of the filename.
+ parts[1] = ' '
+ parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
+ key = '/'.join(parts)
+
+ return key
+
+class SortIncludes(object):
+ # different types of includes for different sorting of headers
+ # <Python.h> - Python header needs to be first if it exists
+ # <*.h> - system headers (directories before files)
+ # <*> - STL headers
+ # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
+ # "*" - M5 headers (directories before files)
+ includes_re = (
+ ('python', '<>', r'^(#include)[ \t]+<(Python.*\.h)>(.*)'),
+ ('c', '<>', r'^(#include)[ \t]<(.+\.h)>(.*)'),
+ ('stl', '<>', r'^(#include)[ \t]+<([0-9A-z_]+)>(.*)'),
+ ('cc', '<>', r'^(#include)[ \t]+<([0-9A-z_]+\.(hh|hxx|hpp|H))>(.*)'),
+ ('m5cc', '""', r'^(#include)[ \t]"(.+\.h{1,2})"(.*)'),
+ ('swig0', '<>', r'^(%import)[ \t]<(.+)>(.*)'),
+ ('swig1', '<>', r'^(%include)[ \t]<(.+)>(.*)'),
+ ('swig2', '""', r'^(%import)[ \t]"(.+)"(.*)'),
+ ('swig3', '""', r'^(%include)[ \t]"(.+)"(.*)'),
+ )
+
+ # compile the regexes
+ includes_re = tuple((a, b, re.compile(c)) for a,b,c in includes_re)
+
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ # clear all stored headers
+ self.includes = {}
+ for include_type,_,_ in self.includes_re:
+ self.includes[include_type] = []
+
+ def dump_block(self):
+ '''dump the includes'''
+ first = True
+ for include,_,_ in self.includes_re:
+ if not self.includes[include]:
+ continue
+
+ if not first:
+ # print a newline between groups of
+ # include types
+ yield ''
+ first = False
+
+ # print out the includes in the current group
+ # and sort them according to include_key()
+ prev = None
+ for l in sorted(self.includes[include],
+ key=include_key):
+ if l != prev:
+ yield l
+ prev = l
+
+ def __call__(self, lines, filename, language):
+ leading_blank = False
+ blanks = 0
+ block = False
+
+ for line in lines:
+ if not line:
+ blanks += 1
+ if not block:
+ # if we're not in an include block, spit out the
+ # newline otherwise, skip it since we're going to
+ # control newlines withinin include block
+ yield ''
+ continue
+
+ # Try to match each of the include types
+ for include_type,(ldelim,rdelim),include_re in self.includes_re:
+ match = include_re.match(line)
+ if not match:
+ continue
+
+ # if we've got a match, clean up the #include line,
+ # fix up stl headers and store it in the proper category
+ groups = match.groups()
+ keyword = groups[0]
+ include = groups[1]
+ extra = groups[-1]
+ if include_type == 'c' and language == 'C++':
+ stl_inc = cpp_c_headers.get(include, None)
+ if stl_inc:
+ include = stl_inc
+ include_type = 'stl'
+
+ line = keyword + ' ' + ldelim + include + rdelim + extra
+
+ self.includes[include_type].append(line)
+
+ # We've entered a block, don't keep track of blank
+ # lines while in a block
+ block = True
+ blanks = 0
+ break
+ else:
+ # this line did not match a #include
+ assert not include_re.match(line)
+
+ # if we're not in a block and we didn't match an include
+ # to enter a block, just emit the line and continue
+ if not block:
+ yield line
+ continue
+
+ # We've exited an include block.
+ for block_line in self.dump_block():
+ yield block_line
+
+ # if there are any newlines after the include block,
+ # emit a single newline (removing extras)
+ if blanks and block:
+ yield ''
+
+ blanks = 0
+ block = False
+ self.reset()
+
+ # emit the line that ended the block
+ yield line
+
+ if block:
+ # We've exited an include block.
+ for block_line in self.dump_block():
+ yield block_line
+
+
+
+# default language types to try to apply our sorting rules to
+default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
+
+def options():
+ import optparse
+ options = optparse.OptionParser()
+ add_option = options.add_option
+ add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
+ default=','.join(default_dir_ignore),
+ help="ignore directories")
+ add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
+ default=','.join(default_file_ignore),
+ help="ignore files")
+ add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
+ default=','.join(default_languages),
+ help="languages")
+ add_option('-n', '--dry-run', action='store_true',
+ help="don't overwrite files")
+
+ return options
+
+def parse_args(parser):
+ opts,args = parser.parse_args()
+
+ opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
+ opts.file_ignore = frozenset(opts.file_ignore.split(','))
+ opts.languages = frozenset(opts.languages.split(','))
+
+ return opts,args
+
+if __name__ == '__main__':
+ parser = options()
+ opts, args = parse_args(parser)
+
+ for base in args:
+ for filename,language in find_files(base, languages=opts.languages,
+ file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
+ if opts.dry_run:
+ print "%s: %s" % (filename, language)
+ else:
+ update_file(filename, filename, language, SortIncludes())