summaryrefslogtreecommitdiff
path: root/util/stats/profile.py
diff options
context:
space:
mode:
authorNathan Binkert <binkertn@umich.edu>2005-10-18 19:07:42 -0400
committerNathan Binkert <binkertn@umich.edu>2005-10-18 19:07:42 -0400
commita81c03737addc8e9a9b00cde0354e6c0ab4561af (patch)
treee7415ec9c2a97cf676d5599a1cec2c1fd775ace2 /util/stats/profile.py
parent357ee7a845eac0bd903ed31e31eec993d54a698c (diff)
downloadgem5-a81c03737addc8e9a9b00cde0354e6c0ab4561af.tar.xz
Add new function profiling stuff, wrap the pc_sample stuff into it.
SConscript: Get rid of the pc_sample stuff and move to the new profiling stuff base/traceflags.py: DPRINTF Stack stuff cpu/base.cc: cpu/base.hh: cpu/exec_context.cc: cpu/exec_context.hh: cpu/simple/cpu.cc: Add profiling stuff kern/kernel_stats.hh: Use a smart pointer sim/system.cc: sim/system.hh: Create a new symbol table that has all of the symbols for a particular system util/stats/categories.py: change around the categories, add categories for function profiling stuff util/stats/profile.py: No profile parsing and display code to deal with function profiling stuff, graph, dot, and text outputs. --HG-- extra : convert_revision : b3de0cdc8bd468e42647966e2640ae009bda9eb8
Diffstat (limited to 'util/stats/profile.py')
-rw-r--r--util/stats/profile.py484
1 files changed, 412 insertions, 72 deletions
diff --git a/util/stats/profile.py b/util/stats/profile.py
index 65a03e9aa..cf946452b 100644
--- a/util/stats/profile.py
+++ b/util/stats/profile.py
@@ -27,103 +27,227 @@
from orderdict import orderdict
import output
-class ProfileData(object):
- def __init__(self):
- self.data = {}
- self.total = {}
- self.runs = orderdict()
- self.runlist = []
+class RunData(dict):
+ def __init__(self, filename=None):
+ self.filename = filename
- def addvalue(self, run, cpu, symbol, value):
- value = float(value)
- self.data[run, cpu, symbol] = self.getvalue(run, cpu, symbol) + value
- self.total[run, cpu] = self.gettotal(run, cpu) + value
- if run not in self.runs:
- self.runs[run] = orderdict()
+ def __getattr__(self, attr):
+ if attr == 'total':
+ total = 0.0
+ for value in self.itervalues():
+ total += value
+ return total
+ if attr == 'maxsymlen':
+ return max([ len(sym) for sym in self.iterkeys() ])
- if cpu not in self.runs[run]:
- self.runs[run][cpu] = {}
+ def display(self, output=None, limit=None, maxsymlen=None):
+ if not output:
+ import sys
+ output = sys.stdout
+ elif isinstance(output, str):
+ output = file(output, 'w')
- if symbol not in self.runs[run][cpu]:
- self.runs[run][cpu][symbol] = 0
+ total = float(self.total)
- self.runs[run][cpu][symbol] += value
+ # swap (string,count) order so we can sort on count
+ symbols = [ (count,name) for name,count in self.iteritems() ]
+ symbols.sort(reverse=True)
+ if limit is not None:
+ symbols = symbols[:limit]
- def getvalue(self, run, cpu, symbol):
- return self.data.get((run, cpu, symbol), 0)
+ if not maxsymlen:
+ maxsymlen = self.maxsymlen
- def gettotal(self, run, cpu):
- return self.total.get((run, cpu), 0)
+ symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%"
+ for number,name in symbols:
+ print >>output, symbolf % (name, 100.0 * (float(number) / total))
-class Profile(object):
- default_order = ['ste', 'hte', 'htd', 'ocm', 'occ', 'ocp']
- # This list controls the order of values in stacked bar data output
- default_categories = [ 'interrupt',
- 'driver',
- 'stack',
- 'bufmgt',
- 'copy',
- 'user',
- 'other',
- 'idle']
- def __init__(self, run_order=[], categories=[], stacknames=[]):
- if not run_order:
- run_order = Profile.default_order
- if not categories:
- categories = Profile.default_categories
+class PCData(RunData):
+ def __init__(self, filename=None, categorize=None, showidle=True):
+ super(PCData, self).__init__(self, filename)
+ if filename is None:
+ return
- self.run_order = run_order
- self.categories = categories
- self.rcategories = []
- self.rcategories.extend(categories)
- self.rcategories.reverse()
- self.stacknames = stacknames
- self.prof = ProfileData()
- self.categorize = True
- self.showidle = True
- self.maxsymlen = 0
-
- def category(self, symbol):
- from categories import categories, categories_re
- if categories.has_key(symbol):
- return categories[symbol]
- for regexp, cat in categories_re:
- if regexp.match(symbol):
- return cat
- return 'other'
-
- # Parse input file and put the results in the given run and cpu
- def parsefile(self, run, cpu, filename):
fd = file(filename)
for line in fd:
+ if line.strip() == '>>>PC data':
+ break
+
+ for line in fd:
+ if line.startswith('>>>'):
+ break
+
(symbol, count) = line.split()
if symbol == "0x0":
continue
count = int(count)
- if self.categorize:
- symbol = self.category(symbol)
- if symbol == 'idle' and not self.showidle:
+ if categorize is not None:
+ category = categorize(symbol)
+ if category is None:
+ category = 'other'
+ elif category == 'idle' and not showidle:
continue
- if symbol not in self.categories:
- symbol = 'other'
-
- self.maxsymlen = max(self.maxsymlen, len(symbol))
- self.prof.addvalue(run, cpu, symbol, count)
+ self[category] = count
fd.close()
+class FuncNode(object):
+ def __new__(cls, filename = None):
+ if filename is None:
+ return super(FuncNode, cls).__new__(cls)
+
+ fd = file(filename, 'r')
+ fditer = iter(fd)
+ nodes = {}
+ for line in fditer:
+ if line.strip() == '>>>function data':
+ break
+
+ for line in fditer:
+ if line.startswith('>>>'):
+ break
+
+ data = line.split()
+ node_id = int(data[0], 16)
+ node = FuncNode()
+ node.symbol = data[1]
+ node.count = int(data[2])
+ node.children = [ int(child, 16) for child in data[3:] ]
+ nodes[node_id] = node
+
+ for node in nodes.itervalues():
+ children = []
+ for cid in node.children:
+ child = nodes[cid]
+ children.append(child)
+ child.parent = node
+ node.children = tuple(children)
+ if not nodes:
+ print filename
+ print nodes
+ return nodes[0]
+
+ def __init__(self, filename=None):
+ pass
+
+ def total(self):
+ total = self.count
+ for child in self.children:
+ total += child.total()
+
+ return total
+
+ def aggregate(self, dict, categorize, incategory):
+ category = None
+ if categorize:
+ category = categorize(self.symbol)
+
+ total = self.count
+ for child in self.children:
+ total += child.aggregate(dict, categorize, category or incategory)
+
+ if category:
+ dict[category] = dict.get(category, 0) + total
+ return 0
+ elif not incategory:
+ dict[self.symbol] = dict.get(self.symbol, 0) + total
+
+ return total
+
+ def dump(self):
+ kids = [ child.symbol for child in self.children]
+ print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids))
+ for child in self.children:
+ child.dump()
+
+ def _dot(self, dot, threshold, categorize, total):
+ from pydot import Dot, Edge, Node
+ self.dot_node = None
+
+ value = self.total() * 100.0 / total
+ if value < threshold:
+ return
+ if categorize:
+ category = categorize(self.symbol)
+ if category and category != 'other':
+ return
+ label = '%s %.2f%%' % (self.symbol, value)
+ self.dot_node = Node(self, label=label)
+ dot.add_node(self.dot_node)
+
+ for child in self.children:
+ child._dot(dot, threshold, categorize, total)
+ if child.dot_node is not None:
+ dot.add_edge(Edge(self, child))
+
+ def _cleandot(self):
+ for child in self.children:
+ child._cleandot()
+ self.dot_node = None
+ del self.__dict__['dot_node']
+
+ def dot(self, dot, threshold=0.1, categorize=None):
+ self._dot(dot, threshold, categorize, self.total())
+ self._cleandot()
+
+class FuncData(RunData):
+ def __init__(self, filename, categorize=None):
+ super(FuncData, self).__init__(filename)
+ self.tree = FuncNode(filename)
+ self.tree.aggregate(self, categorize, incategory=False)
+ self.total = self.tree.total()
+
+ def displayx(self, output=None, maxcount=None):
+ if output is None:
+ import sys
+ output = sys.stdout
+
+ items = [ (val,key) for key,val in self.iteritems() ]
+ items.sort(reverse=True)
+ for val,key in items:
+ if maxcount is not None:
+ if maxcount == 0:
+ return
+ maxcount -= 1
+
+ percent = val * 100.0 / self.total
+ print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent)
+
+class Profile(object):
+ # This list controls the order of values in stacked bar data output
+ default_categories = [ 'interrupt',
+ 'driver',
+ 'stack',
+ 'buffer',
+ 'copy',
+ 'syscall',
+ 'user',
+ 'other',
+ 'idle']
+
+ def __init__(self, datatype, categorize=None):
+ categories = Profile.default_categories
+
+ self.datatype = datatype
+ self.categorize = categorize
+ self.data = {}
+ self.categories = categories[:]
+ self.rcategories = categories[:]
+ self.rcategories.reverse()
+ self.cpu = 0
+
# Read in files
def inputdir(self, directory):
import os, os.path, re
from os.path import expanduser, join as joinpath
directory = expanduser(directory)
- label_ex = re.compile(r'm5prof\.(.*)')
+ label_ex = re.compile(r'profile\.(.*).dat')
for root,dirs,files in os.walk(directory):
for name in files:
match = label_ex.match(name)
@@ -133,14 +257,230 @@ class Profile(object):
filename = joinpath(root, name)
prefix = os.path.commonprefix([root, directory])
dirname = root[len(prefix)+1:]
- self.parsefile(dirname, match.group(1), filename)
+ data = self.datatype(filename, self.categorize)
+ self.setdata(dirname, match.group(1), data)
+
+ def setdata(self, run, cpu, data):
+ if run not in self.data:
+ self.data[run] = {}
+
+ if cpu in self.data[run]:
+ raise AttributeError, \
+ 'data already stored for run %s and cpu %s' % (run, cpu)
+
+ self.data[run][cpu] = data
+
+ def getdata(self, run, cpu):
+ try:
+ return self.data[run][cpu]
+ except KeyError:
+ return None
+
+ def alldata(self):
+ for run,cpus in self.data.iteritems():
+ for cpu,data in cpus.iteritems():
+ yield run,cpu,data
def get(self, job, stat):
if job.system is None:
raise AttributeError, 'The job must have a system set'
- cpu = '%s.full0' % job.system
+ data = self.getdata(job.name, '%s.full%d' % (job.system, self.cpu))
+ if not data:
+ return [ 0.0 for c in self.categories ]
+
values = []
- for cat in self.categories:
- values.append(self.prof.getvalue(job.name, cpu, cat))
+ for category in self.categories:
+ values.append(data.get(category, 0.0))
return values
+
+ def dump(self):
+ for run,cpu,data in self.alldata():
+ print 'run %s, cpu %s' % (run, cpu)
+ data.dump()
+ print
+
+ def write_dot(self, threshold, jobfile=None, jobs=None):
+ import pydot
+
+ if jobs is None:
+ jobs = [ job for job in jobfile.jobs() ]
+
+ for job in jobs:
+ cpu = '%s.full%d' % (job.system, self.cpu)
+ symbols = self.getdata(job.name, cpu)
+ if not symbols:
+ continue
+
+ dot = pydot.Dot()
+ symbols.tree.dot(dot, threshold=threshold)
+ dot.write(symbols.filename[:-3] + 'dot')
+
+ def write_txt(self, jobfile=None, jobs=None):
+ if jobs is None:
+ jobs = [ job for job in jobfile.jobs() ]
+
+ for job in jobs:
+ cpu = '%s.full%d' % (job.system, self.cpu)
+ symbols = self.getdata(job.name, cpu)
+ if not symbols:
+ continue
+
+ output = file(symbols.filename[:-3] + 'txt', 'w')
+ symbols.display(output)
+
+ def display(self, jobfile=None, jobs=None, limit=None):
+ if jobs is None:
+ jobs = [ job for job in jobfile.jobs() ]
+
+ maxsymlen = 0
+
+ thejobs = []
+ for job in jobs:
+ cpu = '%s.full%d' % (job.system, self.cpu)
+ symbols = self.getdata(job.name, cpu)
+ if symbols:
+ thejobs.append(job)
+ maxsymlen = max(maxsymlen, symbols.maxsymlen)
+
+ for job in thejobs:
+ cpu = '%s.full%d' % (job.system, self.cpu)
+ symbols = self.getdata(job.name, cpu)
+ print job.name
+ symbols.display(limit=limit, maxsymlen=maxsymlen)
+ print
+
+
+from categories import func_categorize, pc_categorize
+class PCProfile(Profile):
+ def __init__(self, categorize=pc_categorize):
+ super(PCProfile, self).__init__(PCData, categorize)
+
+
+class FuncProfile(Profile):
+ def __init__(self, categorize=func_categorize):
+ super(FuncProfile, self).__init__(FuncData, categorize)
+
+def usage(exitcode = None):
+ print '''\
+Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>]
+
+ -c groups symbols into categories
+ -b dumps data for bar charts
+ -d generate dot output
+ -g <d> draw graphs and send output to <d>
+ -j <jobfile> specify a different jobfile (default is Test.py)
+ -n <n> selects number of top symbols to print (default 5)
+''' % sys.argv[0]
+
+ if exitcode is not None:
+ sys.exit(exitcode)
+
+if __name__ == '__main__':
+ import getopt, re, sys
+ from os.path import expanduser
+ from output import StatOutput
+ from jobfile import JobFile
+
+ # default option values
+ numsyms = 10
+ graph = None
+ cpus = [ 0 ]
+ categorize = False
+ showidle = True
+ funcdata = True
+ jobfilename = 'Test.py'
+ dodot = False
+ dotformat = 'raw'
+ textout = False
+ threshold = 0.01
+ inputfile = None
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t')
+ except getopt.GetoptError:
+ usage(2)
+
+ for o,a in opts:
+ if o == '-C':
+ cpus = [ int(x) for x in a.split(',') ]
+ elif o == '-c':
+ categorize = True
+ elif o == '-D':
+ dotformat = a
+ elif o == '-d':
+ dodot = True
+ elif o == '-f':
+ inputfile = expanduser(a)
+ elif o == '-g':
+ graph = a
+ elif o == '-i':
+ showidle = False
+ elif o == '-j':
+ jobfilename = a
+ elif o == '-n':
+ numsyms = int(a)
+ elif o == '-p':
+ funcdata = False
+ elif o == '-T':
+ threshold = float(a)
+ elif o == '-t':
+ textout = True
+
+ if args:
+ print "'%s'" % args, len(args)
+ usage(1)
+
+ if inputfile:
+ data = FuncData(inputfile)
+
+ if dodot:
+ import pydot
+ dot = pydot.Dot()
+ data.dot(dot, threshold=threshold)
+ #dot.orientation = 'landscape'
+ #dot.ranksep='equally'
+ #dot.rank='samerank'
+ dot.write(dotfile, format=dotformat)
+ else:
+ data.display(limit=numsyms)
+
+ else:
+ jobfile = JobFile(jobfilename)
+
+ if funcdata:
+ profile = FuncProfile()
+ else:
+ profile = PCProfile()
+
+ profile.inputdir(jobfile.rootdir)
+
+ if graph:
+ for cpu in cpus:
+ profile.cpu = cpu
+ if funcdata:
+ name = 'funcstacks%d' % cpu
+ else:
+ name = 'stacks%d' % cpu
+ output = StatOutput(name, jobfile, info=profile)
+ output.graph(graph)
+
+ if dodot:
+ for cpu in cpus:
+ profile.cpu = cpu
+ profile.write_dot(jobfile=jobfile, threshold=threshold)
+
+ if not categorize:
+ for cpu in cpus:
+ profile.cpu = cpu
+ profile.categorize = None
+
+ if textout:
+ for cpu in cpus:
+ profile.cpu = cpu
+ profile.write_txt(jobfile=jobfile)
+
+ if not graph and not textout and not dodot:
+ for cpu in cpus:
+ profile.cpu = cpu
+ profile.display(jobfile=jobfile, limit=numsyms)