From a81c03737addc8e9a9b00cde0354e6c0ab4561af Mon Sep 17 00:00:00 2001
From: Nathan Binkert <binkertn@umich.edu>
Date: Tue, 18 Oct 2005 19:07:42 -0400
Subject: Add new function profiling stuff, wrap the pc_sample stuff into it.

SConscript:
    Get rid of the pc_sample stuff and move to the new profiling stuff
base/traceflags.py:
    DPRINTF Stack stuff
cpu/base.cc:
cpu/base.hh:
cpu/exec_context.cc:
cpu/exec_context.hh:
cpu/simple/cpu.cc:
    Add profiling stuff
kern/kernel_stats.hh:
    Use a smart pointer
sim/system.cc:
sim/system.hh:
    Create a new symbol table that has all of the symbols for a
    particular system
util/stats/categories.py:
    change around the categories, add categories for function
    profiling stuff
util/stats/profile.py:
    No profile parsing and display code to deal with function
    profiling stuff, graph, dot, and text outputs.

--HG--
extra : convert_revision : b3de0cdc8bd468e42647966e2640ae009bda9eb8
---
 util/stats/profile.py | 484 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 412 insertions(+), 72 deletions(-)

(limited to 'util/stats/profile.py')

diff --git a/util/stats/profile.py b/util/stats/profile.py
index 65a03e9aa..cf946452b 100644
--- a/util/stats/profile.py
+++ b/util/stats/profile.py
@@ -27,103 +27,227 @@
 from orderdict import orderdict
 import output
 
-class ProfileData(object):
-    def __init__(self):
-        self.data = {}
-        self.total = {}
-        self.runs = orderdict()
-        self.runlist = []
+class RunData(dict):
+    def __init__(self, filename=None):
+        self.filename = filename
 
-    def addvalue(self, run, cpu, symbol, value):
-        value = float(value)
-        self.data[run, cpu, symbol] = self.getvalue(run, cpu, symbol) + value
-        self.total[run, cpu] = self.gettotal(run, cpu) + value
-        if run not in self.runs:
-            self.runs[run] = orderdict()
+    def __getattr__(self, attr):
+        if attr == 'total':
+            total = 0.0
+            for value in self.itervalues():
+                total += value
+            return total
+        if attr == 'maxsymlen':
+            return max([ len(sym) for sym in self.iterkeys() ])
 
-        if cpu not in self.runs[run]:
-            self.runs[run][cpu] = {}
+    def display(self, output=None, limit=None, maxsymlen=None):
+        if not output:
+            import sys
+            output = sys.stdout
+        elif isinstance(output, str):
+            output = file(output, 'w')
 
-        if symbol not in self.runs[run][cpu]:
-            self.runs[run][cpu][symbol] = 0
+        total = float(self.total)
 
-        self.runs[run][cpu][symbol] += value
+        # swap (string,count) order so we can sort on count
+        symbols = [ (count,name) for name,count in self.iteritems() ]
+        symbols.sort(reverse=True)
+        if limit is not None:
+            symbols = symbols[:limit]
 
-    def getvalue(self, run, cpu, symbol):
-        return self.data.get((run, cpu, symbol), 0)
+        if not maxsymlen:
+            maxsymlen = self.maxsymlen
 
-    def gettotal(self, run, cpu):
-        return self.total.get((run, cpu), 0)
+        symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%"
+        for number,name in symbols:
+            print >>output, symbolf % (name, 100.0 * (float(number) / total))
 
-class Profile(object):
-    default_order = ['ste', 'hte', 'htd', 'ocm', 'occ', 'ocp']
 
-    # This list controls the order of values in stacked bar data output
-    default_categories = [ 'interrupt',
-                           'driver',
-                           'stack',
-                           'bufmgt',
-                           'copy',
-                           'user',
-                           'other',
-                           'idle']
 
-    def __init__(self, run_order=[], categories=[], stacknames=[]):
-        if not run_order:
-            run_order = Profile.default_order
-        if not categories:
-            categories = Profile.default_categories
+class PCData(RunData):
+    def __init__(self, filename=None, categorize=None, showidle=True):
+        super(PCData, self).__init__(self, filename)
+        if filename is None:
+            return
 
-        self.run_order = run_order
-        self.categories = categories
-        self.rcategories = []
-        self.rcategories.extend(categories)
-        self.rcategories.reverse()
-        self.stacknames = stacknames
-        self.prof = ProfileData()
-        self.categorize = True
-        self.showidle = True
-        self.maxsymlen = 0
-
-    def category(self, symbol):
-        from categories import categories, categories_re
-        if categories.has_key(symbol):
-            return categories[symbol]
-        for regexp, cat in categories_re:
-            if regexp.match(symbol):
-                return cat
-        return 'other'
-
-    # Parse input file and put the results in the given run and cpu
-    def parsefile(self, run, cpu, filename):
         fd = file(filename)
 
         for line in fd:
+            if line.strip() == '>>>PC data':
+                break
+
+        for line in fd:
+            if line.startswith('>>>'):
+                break
+
             (symbol, count) = line.split()
             if symbol == "0x0":
                 continue
             count = int(count)
 
-            if self.categorize:
-                symbol = self.category(symbol)
-                if symbol == 'idle' and not self.showidle:
+            if categorize is not None:
+                category = categorize(symbol)
+                if category is None:
+                    category = 'other'
+                elif category == 'idle' and not showidle:
                     continue
 
-                if symbol not in self.categories:
-                    symbol = 'other'
-
-            self.maxsymlen = max(self.maxsymlen, len(symbol))
-            self.prof.addvalue(run, cpu, symbol, count)
+                self[category] = count
 
         fd.close()
 
+class FuncNode(object):
+    def __new__(cls, filename = None):
+        if filename is None:
+            return super(FuncNode, cls).__new__(cls)
+
+        fd = file(filename, 'r')
+        fditer = iter(fd)
+        nodes = {}
+        for line in fditer:
+            if line.strip() == '>>>function data':
+                break
+
+        for line in fditer:
+            if line.startswith('>>>'):
+                break
+
+            data = line.split()
+            node_id = int(data[0], 16)
+            node = FuncNode()
+            node.symbol = data[1]
+            node.count = int(data[2])
+            node.children = [ int(child, 16) for child in data[3:] ]
+            nodes[node_id] = node
+
+        for node in nodes.itervalues():
+            children = []
+            for cid in node.children:
+                child = nodes[cid]
+                children.append(child)
+                child.parent = node
+            node.children = tuple(children)
+        if not nodes:
+            print filename
+            print nodes
+        return nodes[0]
+
+    def __init__(self, filename=None):
+        pass
+
+    def total(self):
+        total = self.count
+        for child in self.children:
+            total += child.total()
+
+        return total
+
+    def aggregate(self, dict, categorize, incategory):
+        category = None
+        if categorize:
+            category = categorize(self.symbol)
+
+        total = self.count
+        for child in self.children:
+            total += child.aggregate(dict, categorize, category or incategory)
+
+        if category:
+            dict[category] = dict.get(category, 0) + total
+            return 0
+        elif not incategory:
+            dict[self.symbol] = dict.get(self.symbol, 0) + total
+
+        return total
+
+    def dump(self):
+        kids = [ child.symbol for child in self.children]
+        print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids))
+        for child in self.children:
+            child.dump()
+
+    def _dot(self, dot, threshold, categorize, total):
+        from pydot import Dot, Edge, Node
+        self.dot_node = None
+
+        value = self.total() * 100.0 / total
+        if value < threshold:
+            return
+        if categorize:
+            category = categorize(self.symbol)
+            if category and category != 'other':
+                return
+        label = '%s %.2f%%' % (self.symbol, value)
+        self.dot_node = Node(self, label=label)
+        dot.add_node(self.dot_node)
+
+        for child in self.children:
+            child._dot(dot, threshold, categorize, total)
+            if child.dot_node is not None:
+                dot.add_edge(Edge(self, child))
+
+    def _cleandot(self):
+        for child in self.children:
+            child._cleandot()
+            self.dot_node = None
+            del self.__dict__['dot_node']
+
+    def dot(self, dot, threshold=0.1, categorize=None):
+        self._dot(dot, threshold, categorize, self.total())
+        self._cleandot()
+
+class FuncData(RunData):
+    def __init__(self, filename, categorize=None):
+        super(FuncData, self).__init__(filename)
+        self.tree = FuncNode(filename)
+        self.tree.aggregate(self, categorize, incategory=False)
+        self.total = self.tree.total()
+
+    def displayx(self, output=None, maxcount=None):
+        if output is None:
+            import sys
+            output = sys.stdout
+
+        items = [ (val,key) for key,val in self.iteritems() ]
+        items.sort(reverse=True)
+        for val,key in items:
+            if maxcount is not None:
+                if maxcount == 0:
+                    return
+                maxcount -= 1
+
+            percent = val * 100.0 / self.total
+            print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent)
+
+class Profile(object):
+    # This list controls the order of values in stacked bar data output
+    default_categories = [ 'interrupt',
+                           'driver',
+                           'stack',
+                           'buffer',
+                           'copy',
+                           'syscall',
+                           'user',
+                           'other',
+                           'idle']
+
+    def __init__(self, datatype, categorize=None):
+        categories = Profile.default_categories
+
+        self.datatype = datatype
+        self.categorize = categorize
+        self.data = {}
+        self.categories = categories[:]
+        self.rcategories = categories[:]
+        self.rcategories.reverse()
+        self.cpu = 0
+
     # Read in files
     def inputdir(self, directory):
         import os, os.path, re
         from os.path import expanduser, join as joinpath
 
         directory = expanduser(directory)
-        label_ex = re.compile(r'm5prof\.(.*)')
+        label_ex = re.compile(r'profile\.(.*).dat')
         for root,dirs,files in os.walk(directory):
             for name in files:
                 match = label_ex.match(name)
@@ -133,14 +257,230 @@ class Profile(object):
                 filename = joinpath(root, name)
                 prefix = os.path.commonprefix([root, directory])
                 dirname = root[len(prefix)+1:]
-                self.parsefile(dirname, match.group(1), filename)
+                data = self.datatype(filename, self.categorize)
+                self.setdata(dirname, match.group(1), data)
+
+    def setdata(self, run, cpu, data):
+        if run not in self.data:
+            self.data[run] = {}
+
+        if cpu in self.data[run]:
+            raise AttributeError, \
+                  'data already stored for run %s and cpu %s' % (run, cpu)
+
+        self.data[run][cpu] = data
+
+    def getdata(self, run, cpu):
+        try:
+            return self.data[run][cpu]
+        except KeyError:
+            return None
+
+    def alldata(self):
+        for run,cpus in self.data.iteritems():
+            for cpu,data in cpus.iteritems():
+                yield run,cpu,data
 
     def get(self, job, stat):
         if job.system is None:
             raise AttributeError, 'The job must have a system set'
 
-        cpu =  '%s.full0' % job.system
+        data = self.getdata(job.name, '%s.full%d' % (job.system, self.cpu))
+        if not data:
+            return [ 0.0 for c in self.categories ]
+
         values = []
-        for cat in self.categories:
-            values.append(self.prof.getvalue(job.name, cpu, cat))
+        for category in self.categories:
+            values.append(data.get(category, 0.0))
         return values
+
+    def dump(self):
+        for run,cpu,data in self.alldata():
+            print 'run %s, cpu %s' % (run, cpu)
+            data.dump()
+            print
+
+    def write_dot(self, threshold, jobfile=None, jobs=None):
+        import pydot
+
+        if jobs is None:
+            jobs = [ job for job in jobfile.jobs() ]
+
+        for job in jobs:
+            cpu =  '%s.full%d' % (job.system, self.cpu)
+            symbols = self.getdata(job.name, cpu)
+            if not symbols:
+                continue
+
+            dot = pydot.Dot()
+            symbols.tree.dot(dot, threshold=threshold)
+            dot.write(symbols.filename[:-3] + 'dot')
+
+    def write_txt(self, jobfile=None, jobs=None):
+        if jobs is None:
+            jobs = [ job for job in jobfile.jobs() ]
+
+        for job in jobs:
+            cpu =  '%s.full%d' % (job.system, self.cpu)
+            symbols = self.getdata(job.name, cpu)
+            if not symbols:
+                continue
+
+            output = file(symbols.filename[:-3] + 'txt', 'w')
+            symbols.display(output)
+
+    def display(self, jobfile=None, jobs=None, limit=None):
+        if jobs is None:
+            jobs = [ job for job in jobfile.jobs() ]
+
+        maxsymlen = 0
+
+        thejobs = []
+        for job in jobs:
+            cpu =  '%s.full%d' % (job.system, self.cpu)
+            symbols = self.getdata(job.name, cpu)
+            if symbols:
+                thejobs.append(job)
+                maxsymlen = max(maxsymlen, symbols.maxsymlen)
+
+        for job in thejobs:
+            cpu =  '%s.full%d' % (job.system, self.cpu)
+            symbols = self.getdata(job.name, cpu)
+            print job.name
+            symbols.display(limit=limit, maxsymlen=maxsymlen)
+            print
+
+
+from categories import func_categorize, pc_categorize
+class PCProfile(Profile):
+    def __init__(self, categorize=pc_categorize):
+        super(PCProfile, self).__init__(PCData, categorize)
+
+
+class FuncProfile(Profile):
+    def __init__(self, categorize=func_categorize):
+        super(FuncProfile, self).__init__(FuncData, categorize)
+
+def usage(exitcode = None):
+    print '''\
+Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>]
+
+    -c           groups symbols into categories
+    -b           dumps data for bar charts
+    -d           generate dot output
+    -g <d>       draw graphs and send output to <d>
+    -j <jobfile> specify a different jobfile (default is Test.py)
+    -n <n>       selects number of top symbols to print (default 5)
+''' % sys.argv[0]
+
+    if exitcode is not None:
+        sys.exit(exitcode)
+
+if __name__ == '__main__':
+    import getopt, re, sys
+    from os.path import expanduser
+    from output import StatOutput
+    from jobfile import JobFile
+
+    # default option values
+    numsyms = 10
+    graph = None
+    cpus = [ 0 ]
+    categorize = False
+    showidle = True
+    funcdata = True
+    jobfilename = 'Test.py'
+    dodot = False
+    dotformat = 'raw'
+    textout = False
+    threshold = 0.01
+    inputfile = None
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t')
+    except getopt.GetoptError:
+        usage(2)
+
+    for o,a in opts:
+        if o == '-C':
+            cpus = [ int(x) for x in a.split(',') ]
+        elif o == '-c':
+            categorize = True
+        elif o == '-D':
+            dotformat = a
+        elif o == '-d':
+            dodot = True
+        elif o == '-f':
+            inputfile = expanduser(a)
+        elif o == '-g':
+            graph = a
+        elif o == '-i':
+            showidle = False
+        elif o == '-j':
+            jobfilename = a
+        elif o == '-n':
+            numsyms = int(a)
+        elif o == '-p':
+            funcdata = False
+        elif o == '-T':
+            threshold = float(a)
+        elif o == '-t':
+            textout = True
+
+    if args:
+        print "'%s'" % args, len(args)
+        usage(1)
+
+    if inputfile:
+        data = FuncData(inputfile)
+
+        if dodot:
+            import pydot
+            dot = pydot.Dot()
+            data.dot(dot, threshold=threshold)
+            #dot.orientation = 'landscape'
+            #dot.ranksep='equally'
+            #dot.rank='samerank'
+            dot.write(dotfile, format=dotformat)
+        else:
+            data.display(limit=numsyms)
+
+    else:
+        jobfile = JobFile(jobfilename)
+
+        if funcdata:
+            profile = FuncProfile()
+        else:
+            profile = PCProfile()
+
+        profile.inputdir(jobfile.rootdir)
+
+        if graph:
+            for cpu in cpus:
+                profile.cpu = cpu
+                if funcdata:
+                    name = 'funcstacks%d' % cpu
+                else:
+                    name = 'stacks%d' % cpu
+                output = StatOutput(name, jobfile, info=profile)
+                output.graph(graph)
+
+        if dodot:
+            for cpu in cpus:
+                profile.cpu = cpu
+                profile.write_dot(jobfile=jobfile, threshold=threshold)
+
+        if not categorize:
+            for cpu in cpus:
+                profile.cpu = cpu
+                profile.categorize = None
+
+        if textout:
+            for cpu in cpus:
+                profile.cpu = cpu
+                profile.write_txt(jobfile=jobfile)
+
+        if not graph and not textout and not dodot:
+            for cpu in cpus:
+                profile.cpu = cpu
+                profile.display(jobfile=jobfile, limit=numsyms)
-- 
cgit v1.2.3