From 0c424344facaa2b388cba814f08ef5622f8e122b Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Thu, 2 Jun 2011 17:36:07 -0700 Subject: copyright: Add code for finding all copyright blocks and create a COPYING file The end of the COPYING file was generated with: % python ./util/find_copyrights.py configs src system tests util Update -C command line option to spit out COPYING file --- util/find_copyrights.py | 273 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 util/find_copyrights.py (limited to 'util') diff --git a/util/find_copyrights.py b/util/find_copyrights.py new file mode 100644 index 000000000..697f4b728 --- /dev/null +++ b/util/find_copyrights.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python + +import os +import re +import sys + +from file_types import lang_type, find_files + +mode_line = re.compile('(-\*- *mode:.* *-\*-)') +shell_comment = re.compile(r'^\s*#') +lisp_comment = re.compile(r';') +cpp_comment = re.compile(r'//') +c_comment_start = re.compile(r'/\*') +c_comment_end = re.compile(r'\*/') +def find_copyright_block(lines, lang_type): + start = None + if lang_type in ('python', 'make', 'shell', 'perl', 'scons'): + for i,line in enumerate(lines): + if i == 0 and (line.startswith('#!') or mode_line.search(line)): + continue + + if shell_comment.search(line): + if start is None: + start = i + elif start is None: + if line.strip(): + return + else: + yield start, i-1 + start = None + + elif lang_type in ('lisp', ): + for i,line in enumerate(lines): + if i == 0 and mode_line.search(line): + continue + + if lisp_comment.search(line): + if start is None: + start = i + elif start is None: + if line.strip(): + return + else: + yield start, i-1 + start = None + + elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc', + 'lex', 'yacc'): + mode = None + for i,line in enumerate(lines): + if i == 0 and mode_line.search(line): + continue + + if mode == 'C': + assert start is not None, 'on line %d' % (i + 1) + match = c_comment_end.search(line) + if match: + yield start, i + mode = None + continue + + cpp_match = cpp_comment.search(line) + c_match = c_comment_start.search(line) + + if cpp_match: + assert not c_match, 'on line %d' % (i + 1) + if line[:cpp_match.start()].strip(): + return + if mode is None: + mode = 'CPP' + start = i + else: + text = line[cpp_match.end():].lstrip() + if text.startswith("Copyright") > 0: + yield start, i-1 + start = i + continue + elif mode == 'CPP': + assert start is not None, 'on line %d' % (i + 1) + if not line.strip(): + continue + yield start, i-1 + mode = None + if not c_match: + return + + if c_match: + assert mode is None, 'on line %d' % (i + 1) + mode = 'C' + start = i + + if mode is None and line.strip(): + return + + else: + raise AttributeError, "Could not handle language %s" % lang_type + +date_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})') +def process_dates(dates): + dates = [ d.strip() for d in dates.split(',') ] + + output = set() + for date in dates: + match = date_range_re.match(date) + if match: + f,l = [ int(d) for d in match.groups() ] + for i in xrange(f, l+1): + output.add(i) + else: + try: + date = int(date) + output.add(date) + except ValueError: + pass + + return output + +copyright_re = \ + re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)', + re.DOTALL) + +authors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$') +more_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$') + +all_owners = set() +def get_data(lang_type, lines): + data = [] + last = None + for start,end in find_copyright_block(lines, lang_type): + joined = ''.join(lines[start:end+1]) + match = copyright_re.search(joined) + if not match: + continue + + c,dates,owner = match.groups() + dates = dates.strip() + owner = owner.strip() + + all_owners.add(owner) + try: + dates = process_dates(dates) + except Exception: + print dates + print owner + raise + + authors = [] + for i in xrange(start,end+1): + line = lines[i] + if not authors: + match = authors_re.search(line) + if match: + authors.append(match.group(1).strip()) + else: + match = more_authors_re.search(line) + if not match: + for j in xrange(i, end+1): + line = lines[j].strip() + if not line: + end = j + break + if line.startswith('//'): + line = line[2:].lstrip() + if line: + end = j - 1 + break + break + authors.append(match.group(1).strip()) + + info = (owner, dates, authors, start, end) + data.append(info) + + return data + +def datestr(dates): + dates = list(dates) + dates.sort() + + output = [] + def add_output(first, second): + if first == second: + output.append('%d' % (first)) + else: + output.append('%d-%d' % (first, second)) + + first = dates.pop(0) + second = first + while dates: + next = dates.pop(0) + if next == second + 1: + second = next + else: + add_output(first, second) + first = next + second = next + + add_output(first, second) + + return ','.join(output) + +usage_str = """usage: +%s [-v] """ + +def usage(exitcode): + print usage_str % sys.argv[0] + if exitcode is not None: + sys.exit(exitcode) + +if __name__ == '__main__': + import getopt + + show_counts = False + ignore = set() + verbose = False + try: + opts, args = getopt.getopt(sys.argv[1:], "ci:v") + except getopt.GetoptError: + usage(1) + + for o,a in opts: + if o == '-c': + show_counts = True + if o == '-i': + ignore.add(a) + if o == '-v': + verbose = True + + files = [] + + for base in args: + if os.path.isfile(base): + files += [ (base, lang_type(base)) ] + elif os.path.isdir(base): + files += find_files(base) + else: + raise AttributeError, "can't access '%s'" % base + + copyrights = {} + counts = {} + + for filename, lang in files: + f = file(filename, 'r') + lines = f.readlines() + if not lines: + continue + + lines = [ line.rstrip('\r\n') for line in lines ] + + lt = lang_type(filename, lines[0]) + try: + data = get_data(lt, lines) + except Exception, e: + if verbose: + if len(e.args) == 1: + e.args = ('%s (%s))' % (e, filename), ) + print "could not parse %s: %s" % (filename, e) + continue + + for owner, dates, authors, start, end in data: + if owner not in copyrights: + copyrights[owner] = set() + if owner not in counts: + counts[owner] = 0 + + copyrights[owner] |= dates + counts[owner] += 1 + + info = [ (counts[o], d, o) for o,d in copyrights.items() ] + + for count,dates,owner in sorted(info, reverse=True): + if show_counts: + owner = '%s (%s files)' % (owner, count) + print 'Copyright (c) %s %s' % (datestr(dates), owner) -- cgit v1.2.3