summaryrefslogtreecommitdiff
path: root/util/find_copyrights.py
diff options
context:
space:
mode:
authorNathan Binkert <nate@binkert.org>2011-06-02 17:36:07 -0700
committerNathan Binkert <nate@binkert.org>2011-06-02 17:36:07 -0700
commit0c424344facaa2b388cba814f08ef5622f8e122b (patch)
tree5386a973bf1f9a4dcd77a6ae6a89e6b2fb716fdc /util/find_copyrights.py
parentf656787edb2f7f6d95f267fdd91bbcd67f7305ac (diff)
downloadgem5-0c424344facaa2b388cba814f08ef5622f8e122b.tar.xz
copyright: Add code for finding all copyright blocks and create a COPYING file
The end of the COPYING file was generated with: % python ./util/find_copyrights.py configs src system tests util Update -C command line option to spit out COPYING file
Diffstat (limited to 'util/find_copyrights.py')
-rw-r--r--util/find_copyrights.py273
1 files changed, 273 insertions, 0 deletions
diff --git a/util/find_copyrights.py b/util/find_copyrights.py
new file mode 100644
index 000000000..697f4b728
--- /dev/null
+++ b/util/find_copyrights.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python
+
+import os
+import re
+import sys
+
+from file_types import lang_type, find_files
+
+mode_line = re.compile('(-\*- *mode:.* *-\*-)')
+shell_comment = re.compile(r'^\s*#')
+lisp_comment = re.compile(r';')
+cpp_comment = re.compile(r'//')
+c_comment_start = re.compile(r'/\*')
+c_comment_end = re.compile(r'\*/')
+def find_copyright_block(lines, lang_type):
+ start = None
+ if lang_type in ('python', 'make', 'shell', 'perl', 'scons'):
+ for i,line in enumerate(lines):
+ if i == 0 and (line.startswith('#!') or mode_line.search(line)):
+ continue
+
+ if shell_comment.search(line):
+ if start is None:
+ start = i
+ elif start is None:
+ if line.strip():
+ return
+ else:
+ yield start, i-1
+ start = None
+
+ elif lang_type in ('lisp', ):
+ for i,line in enumerate(lines):
+ if i == 0 and mode_line.search(line):
+ continue
+
+ if lisp_comment.search(line):
+ if start is None:
+ start = i
+ elif start is None:
+ if line.strip():
+ return
+ else:
+ yield start, i-1
+ start = None
+
+ elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc',
+ 'lex', 'yacc'):
+ mode = None
+ for i,line in enumerate(lines):
+ if i == 0 and mode_line.search(line):
+ continue
+
+ if mode == 'C':
+ assert start is not None, 'on line %d' % (i + 1)
+ match = c_comment_end.search(line)
+ if match:
+ yield start, i
+ mode = None
+ continue
+
+ cpp_match = cpp_comment.search(line)
+ c_match = c_comment_start.search(line)
+
+ if cpp_match:
+ assert not c_match, 'on line %d' % (i + 1)
+ if line[:cpp_match.start()].strip():
+ return
+ if mode is None:
+ mode = 'CPP'
+ start = i
+ else:
+ text = line[cpp_match.end():].lstrip()
+ if text.startswith("Copyright") > 0:
+ yield start, i-1
+ start = i
+ continue
+ elif mode == 'CPP':
+ assert start is not None, 'on line %d' % (i + 1)
+ if not line.strip():
+ continue
+ yield start, i-1
+ mode = None
+ if not c_match:
+ return
+
+ if c_match:
+ assert mode is None, 'on line %d' % (i + 1)
+ mode = 'C'
+ start = i
+
+ if mode is None and line.strip():
+ return
+
+ else:
+ raise AttributeError, "Could not handle language %s" % lang_type
+
+date_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})')
+def process_dates(dates):
+ dates = [ d.strip() for d in dates.split(',') ]
+
+ output = set()
+ for date in dates:
+ match = date_range_re.match(date)
+ if match:
+ f,l = [ int(d) for d in match.groups() ]
+ for i in xrange(f, l+1):
+ output.add(i)
+ else:
+ try:
+ date = int(date)
+ output.add(date)
+ except ValueError:
+ pass
+
+ return output
+
+copyright_re = \
+ re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)',
+ re.DOTALL)
+
+authors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$')
+more_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$')
+
+all_owners = set()
+def get_data(lang_type, lines):
+ data = []
+ last = None
+ for start,end in find_copyright_block(lines, lang_type):
+ joined = ''.join(lines[start:end+1])
+ match = copyright_re.search(joined)
+ if not match:
+ continue
+
+ c,dates,owner = match.groups()
+ dates = dates.strip()
+ owner = owner.strip()
+
+ all_owners.add(owner)
+ try:
+ dates = process_dates(dates)
+ except Exception:
+ print dates
+ print owner
+ raise
+
+ authors = []
+ for i in xrange(start,end+1):
+ line = lines[i]
+ if not authors:
+ match = authors_re.search(line)
+ if match:
+ authors.append(match.group(1).strip())
+ else:
+ match = more_authors_re.search(line)
+ if not match:
+ for j in xrange(i, end+1):
+ line = lines[j].strip()
+ if not line:
+ end = j
+ break
+ if line.startswith('//'):
+ line = line[2:].lstrip()
+ if line:
+ end = j - 1
+ break
+ break
+ authors.append(match.group(1).strip())
+
+ info = (owner, dates, authors, start, end)
+ data.append(info)
+
+ return data
+
+def datestr(dates):
+ dates = list(dates)
+ dates.sort()
+
+ output = []
+ def add_output(first, second):
+ if first == second:
+ output.append('%d' % (first))
+ else:
+ output.append('%d-%d' % (first, second))
+
+ first = dates.pop(0)
+ second = first
+ while dates:
+ next = dates.pop(0)
+ if next == second + 1:
+ second = next
+ else:
+ add_output(first, second)
+ first = next
+ second = next
+
+ add_output(first, second)
+
+ return ','.join(output)
+
+usage_str = """usage:
+%s [-v] <directory>"""
+
+def usage(exitcode):
+ print usage_str % sys.argv[0]
+ if exitcode is not None:
+ sys.exit(exitcode)
+
+if __name__ == '__main__':
+ import getopt
+
+ show_counts = False
+ ignore = set()
+ verbose = False
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "ci:v")
+ except getopt.GetoptError:
+ usage(1)
+
+ for o,a in opts:
+ if o == '-c':
+ show_counts = True
+ if o == '-i':
+ ignore.add(a)
+ if o == '-v':
+ verbose = True
+
+ files = []
+
+ for base in args:
+ if os.path.isfile(base):
+ files += [ (base, lang_type(base)) ]
+ elif os.path.isdir(base):
+ files += find_files(base)
+ else:
+ raise AttributeError, "can't access '%s'" % base
+
+ copyrights = {}
+ counts = {}
+
+ for filename, lang in files:
+ f = file(filename, 'r')
+ lines = f.readlines()
+ if not lines:
+ continue
+
+ lines = [ line.rstrip('\r\n') for line in lines ]
+
+ lt = lang_type(filename, lines[0])
+ try:
+ data = get_data(lt, lines)
+ except Exception, e:
+ if verbose:
+ if len(e.args) == 1:
+ e.args = ('%s (%s))' % (e, filename), )
+ print "could not parse %s: %s" % (filename, e)
+ continue
+
+ for owner, dates, authors, start, end in data:
+ if owner not in copyrights:
+ copyrights[owner] = set()
+ if owner not in counts:
+ counts[owner] = 0
+
+ copyrights[owner] |= dates
+ counts[owner] += 1
+
+ info = [ (counts[o], d, o) for o,d in copyrights.items() ]
+
+ for count,dates,owner in sorted(info, reverse=True):
+ if show_counts:
+ owner = '%s (%s files)' % (owner, count)
+ print 'Copyright (c) %s %s' % (datestr(dates), owner)