summaryrefslogtreecommitdiff
path: root/util/stats
diff options
context:
space:
mode:
Diffstat (limited to 'util/stats')
-rw-r--r--util/stats/db.py415
-rw-r--r--util/stats/dbinit.py388
-rw-r--r--util/stats/display.py124
-rw-r--r--util/stats/flags.py36
-rw-r--r--util/stats/info.py724
-rw-r--r--util/stats/print.py127
-rwxr-xr-xutil/stats/stats.py478
7 files changed, 2292 insertions, 0 deletions
diff --git a/util/stats/db.py b/util/stats/db.py
new file mode 100644
index 000000000..4cba82446
--- /dev/null
+++ b/util/stats/db.py
@@ -0,0 +1,415 @@
+import MySQLdb, re, string
+
+def statcmp(a, b):
+ v1 = a.split('.')
+ v2 = b.split('.')
+
+ last = min(len(v1), len(v2)) - 1
+ for i,j in zip(v1[0:last], v2[0:last]):
+ if i != j:
+ return cmp(i, j)
+
+ # Special compare for last element.
+ if len(v1) == len(v2):
+ return cmp(v1[last], v2[last])
+ else:
+ return cmp(len(v1), len(v2))
+
+class RunData:
+ def __init__(self, row):
+ self.run = int(row[0])
+ self.name = row[1]
+ self.user = row[2]
+ self.project = row[3]
+
+class SubData:
+ def __init__(self, row):
+ self.stat = int(row[0])
+ self.x = int(row[1])
+ self.y = int(row[2])
+ self.name = row[3]
+ self.descr = row[4]
+
+class Data:
+ def __init__(self, row):
+ if len(row) != 5:
+ raise 'stat db error'
+ self.stat = int(row[0])
+ self.run = int(row[1])
+ self.x = int(row[2])
+ self.y = int(row[3])
+ self.data = float(row[4])
+
+ def __repr__(self):
+ return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
+ self.run, self.x, self.y, self.data)
+
+class StatData(object):
+ def __init__(self, row):
+ self.stat = int(row[0])
+ self.name = row[1]
+ self.desc = row[2]
+ self.type = row[3]
+ self.prereq = int(row[5])
+ self.precision = int(row[6])
+
+ import flags
+ self.flags = 0
+ if int(row[4]): self.flags |= flags.printable
+ if int(row[7]): self.flags |= flags.nozero
+ if int(row[8]): self.flags |= flags.nonan
+ if int(row[9]): self.flags |= flags.total
+ if int(row[10]): self.flags |= flags.pdf
+ if int(row[11]): self.flags |= flags.cdf
+
+ if self.type == 'DIST' or self.type == 'VECTORDIST':
+ self.min = float(row[12])
+ self.max = float(row[13])
+ self.bktsize = float(row[14])
+ self.size = int(row[15])
+
+ if self.type == 'FORMULA':
+ self.formula = self.db.allFormulas[self.stat]
+
+class Node(object):
+ def __init__(self, name):
+ self.name = name
+ def __str__(self):
+ return name
+
+class Database(object):
+ def __init__(self):
+ self.host = 'zizzer.pool'
+ self.user = ''
+ self.passwd = ''
+ self.db = 'm5stats'
+ self.cursor = None
+
+ self.allStats = []
+ self.allStatIds = {}
+ self.allStatNames = {}
+
+ self.allSubData = {}
+
+ self.allRuns = []
+ self.allRunIds = {}
+ self.allRunNames = {}
+
+ self.allBins = []
+ self.allBinIds = {}
+ self.allBinNames = {}
+
+ self.allFormulas = {}
+
+ self.stattop = {}
+ self.statdict = {}
+ self.statlist = []
+
+ self.mode = 'sum';
+ self.runs = None
+ self.bins = None
+ self.ticks = None
+ self.__dict__['get'] = type(self).sum
+
+ def query(self, sql):
+ self.cursor.execute(sql)
+
+ def update_dict(self, dict):
+ dict.update(self.stattop)
+
+ def append(self, stat):
+ statname = re.sub(':', '__', stat.name)
+ path = string.split(statname, '.')
+ pathtop = path[0]
+ fullname = ''
+
+ x = self
+ while len(path) > 1:
+ name = path.pop(0)
+ if not x.__dict__.has_key(name):
+ x.__dict__[name] = Node(fullname + name)
+ x = x.__dict__[name]
+ fullname = '%s%s.' % (fullname, name)
+
+ name = path.pop(0)
+ x.__dict__[name] = stat
+
+ self.stattop[pathtop] = self.__dict__[pathtop]
+ self.statdict[statname] = stat
+ self.statlist.append(statname)
+
+ def connect(self):
+ # connect
+ self.thedb = MySQLdb.connect(db=self.db,
+ host=self.host,
+ user=self.user,
+ passwd=self.passwd)
+
+ # create a cursor
+ self.cursor = self.thedb.cursor()
+
+ self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
+ from runs''')
+ for result in self.cursor.fetchall():
+ run = RunData(result);
+ self.allRuns.append(run)
+ self.allRunIds[run.run] = run
+ self.allRunNames[run.name] = run
+
+ self.query('select * from bins')
+ for id,name in self.cursor.fetchall():
+ self.allBinIds[int(id)] = name
+ self.allBinNames[name] = int(id)
+
+ self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
+ for result in self.cursor.fetchall():
+ subdata = SubData(result)
+ if self.allSubData.has_key(subdata.stat):
+ self.allSubData[subdata.stat].append(subdata)
+ else:
+ self.allSubData[subdata.stat] = [ subdata ]
+
+ self.query('select * from formulas')
+ for id,formula in self.cursor.fetchall():
+ self.allFormulas[int(id)] = formula
+
+ StatData.db = self
+ self.query('select * from stats')
+ import info
+ for result in self.cursor.fetchall():
+ stat = info.NewStat(StatData(result))
+ self.append(stat)
+ self.allStats.append(stat)
+ self.allStatIds[stat.stat] = stat
+ self.allStatNames[stat.name] = stat
+
+ # Name: listbins
+ # Desc: Prints all bins matching regex argument, if no argument
+ # is given all bins are returned
+ def listBins(self, regex='.*'):
+ print '%-50s %-10s' % ('bin name', 'id')
+ print '-' * 61
+ names = self.allBinNames.keys()
+ names.sort()
+ for name in names:
+ id = self.allBinNames[name]
+ print '%-50s %-10d' % (name, id)
+
+ # Name: listruns
+ # Desc: Prints all runs matching a given user, if no argument
+ # is given all runs are returned
+ def listRuns(self, user=None):
+ print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
+ print '-' * 62
+ for run in self.allRuns:
+ if user == None or user == run.user:
+ print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
+
+ # Name: listTicks
+ # Desc: Prints all samples for a given run
+ def listTicks(self, run=None):
+ print "tick"
+ print "----------------------------------------"
+ sql = 'select distinct dt_tick from data where dt_stat=1950'
+ #if run != None:
+ # sql += ' where dt_run=%d' % run
+ self.query(sql)
+ for r in self.cursor.fetchall():
+ print r[0]
+
+ # Name: liststats
+ # Desc: Prints all statistics that appear in the database,
+ # the optional argument is a regular expression that can
+ # be used to prune the result set
+ def listStats(self, regex=None):
+ print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
+ print '-' * 80
+
+ rx = None
+ if regex != None:
+ rx = re.compile(regex)
+
+ stats = [ stat.name for stat in self.allStats ]
+ stats.sort(statcmp)
+ for stat in stats:
+ stat = self.allStatNames[stat]
+ if rx == None or rx.match(stat.name):
+ print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
+
+ # Name: liststats
+ # Desc: Prints all statistics that appear in the database,
+ # the optional argument is a regular expression that can
+ # be used to prune the result set
+ def listFormulas(self, regex=None):
+ print '%-60s %s' % ('formula name', 'formula')
+ print '-' * 80
+
+ rx = None
+ if regex != None:
+ rx = re.compile(regex)
+
+ stats = [ stat.name for stat in self.allStats ]
+ stats.sort(statcmp)
+ for stat in stats:
+ stat = self.allStatNames[stat]
+ if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
+ print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
+
+ def getStat(self, stats):
+ if type(stats) is not list:
+ stats = [ stats ]
+
+ ret = []
+ for stat in stats:
+ if type(stat) is int:
+ ret.append(self.allStatIds[stat])
+
+ if type(stat) is str:
+ rx = re.compile(stat)
+ for stat in self.allStats:
+ if rx.match(stat.name):
+ ret.append(stat)
+ return ret
+
+ def getBin(self, bins):
+ if type(bins) is not list:
+ bins = [ bins ]
+
+ ret = []
+ for bin in bins:
+ if type(bin) is int:
+ ret.append(bin)
+ elif type(bin) is str:
+ ret.append(self.allBinNames[bin])
+ else:
+ for name,id in self.allBinNames.items():
+ if bin.match(name):
+ ret.append(id)
+
+ return ret
+
+ def getNotBin(self, bin):
+ map = {}
+ for bin in getBin(bin):
+ map[bin] = 1
+
+ ret = []
+ for bin in self.allBinIds.keys():
+ if not map.has_key(bin):
+ ret.append(bin)
+
+ return ret
+
+ #########################################
+ # get the data
+ #
+ def inner(self, op, stat, bins, ticks, group=False):
+ sql = 'select '
+ sql += 'dt_stat as stat, '
+ sql += 'dt_run as run, '
+ sql += 'dt_x as x, '
+ sql += 'dt_y as y, '
+ if group:
+ sql += 'dt_tick as tick, '
+ sql += '%s(dt_data) as data ' % op
+ sql += 'from data '
+ sql += 'where '
+
+ if isinstance(stat, list):
+ val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
+ sql += ' (%s)' % val
+ else:
+ sql += ' dt_stat=%d' % stat.stat
+
+ if self.runs != None and len(self.runs):
+ val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
+ sql += ' and (%s)' % val
+
+ if bins != None and len(bins):
+ val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
+ sql += ' and (%s)' % val
+
+ if ticks != None and len(ticks):
+ val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
+ sql += ' and (%s)' % val
+
+ sql += ' group by dt_stat,dt_run,dt_x,dt_y'
+ if group:
+ sql += ',dt_tick'
+ return sql
+
+ def outer(self, op_out, op_in, stat, bins, ticks):
+ sql = self.inner(op_in, stat, bins, ticks, True)
+ sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
+ sql += 'group by stat,run,x,y'
+ return sql
+
+ # Name: sum
+ # Desc: given a run, a stat and an array of samples and bins,
+ # sum all the bins and then get the standard deviation of the
+ # samples for non-binned runs. This will just return the average
+ # of samples, however a bin array still must be passed
+ def sum(self, stat, bins, ticks):
+ return self.inner('sum', stat, bins, ticks)
+
+ # Name: avg
+ # Desc: given a run, a stat and an array of samples and bins,
+ # sum all the bins and then average the samples for non-binned
+ # runs this will just return the average of samples, however
+ # a bin array still must be passed
+ def avg(self, stat, bins, ticks):
+ return self.outer('avg', 'sum', stat, bins, ticks)
+
+ # Name: stdev
+ # Desc: given a run, a stat and an array of samples and bins,
+ # sum all the bins and then get the standard deviation of the
+ # samples for non-binned runs. This will just return the average
+ # of samples, however a bin array still must be passed
+ def stdev(self, stat, bins, ticks):
+ return self.outer('stddev', 'sum', stat, bins, ticks)
+
+ def __getattribute__(self, attr):
+ if attr != 'get':
+ return super(Database, self).__getattribute__(attr)
+
+ if self.__dict__['get'] == type(self).sum:
+ return 'sum'
+ elif self.__dict__['get'] == type(self).avg:
+ return 'avg'
+ elif self.__dict__['get'] == type(self).stdev:
+ return 'stdev'
+ else:
+ return ''
+
+ def __setattr__(self, attr, value):
+ if attr != 'get':
+ super(Database, self).__setattr__(attr, value)
+ return
+
+ if value == 'sum':
+ self.__dict__['get'] = type(self).sum
+ elif value == 'avg':
+ self.__dict__['get'] = type(self).avg
+ elif value == 'stdev':
+ self.__dict__['get'] = type(self).stdev
+ else:
+ raise AttributeError, "can only set get to: sum | avg | stdev"
+
+ def data(self, stat, bins=None, ticks=None):
+ if bins is None:
+ bins = self.bins
+ if ticks is None:
+ ticks = self.ticks
+ sql = self.__dict__['get'](self, stat, bins, ticks)
+ self.query(sql)
+
+ runs = {}
+ for x in self.cursor.fetchall():
+ data = Data(x)
+ if not runs.has_key(data.run):
+ runs[data.run] = {}
+ if not runs[data.run].has_key(data.x):
+ runs[data.run][data.x] = {}
+
+ runs[data.run][data.x][data.y] = data.data
+ return runs
diff --git a/util/stats/dbinit.py b/util/stats/dbinit.py
new file mode 100644
index 000000000..686f55c98
--- /dev/null
+++ b/util/stats/dbinit.py
@@ -0,0 +1,388 @@
+import MySQLdb
+
+class MyDB(object):
+ def __init__(self, options):
+ self.name = options.db
+ self.host = options.host
+ self.user = options.user
+ self.passwd = options.passwd
+ self.mydb = None
+ self.cursor = None
+
+ def admin(self):
+ self.close()
+ self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
+ passwd=self.passwd)
+ self.cursor = self.mydb.cursor()
+
+ def connect(self):
+ self.close()
+ self.mydb = MySQLdb.connect(db=self.name, host=self.host,
+ user=self.user, passwd=self.passwd)
+ self.cursor = self.mydb.cursor()
+
+ def close(self):
+ if self.mydb is not None:
+ self.mydb.close()
+ self.cursor = None
+
+ def query(self, sql):
+ self.cursor.execute(sql)
+
+ def drop(self):
+ self.query('DROP DATABASE IF EXISTS %s' % self.name)
+
+ def create(self):
+ self.query('CREATE DATABASE %s' % self.name)
+
+ def populate(self):
+ #
+ # Each run (or simulation) gets its own entry in the runs table to
+ # group stats by where they were generated
+ #
+ # COLUMNS:
+ # 'id' is a unique identifier for each run to be used in other
+ # tables.
+ # 'name' is the user designated name for the data generated. It is
+ # configured in the simulator.
+ # 'user' identifies the user that generated the data for the given
+ # run.
+ # 'project' another name to identify runs for a specific goal
+ # 'date' is a timestamp for when the data was generated. It can be
+ # used to easily expire data that was generated in the past.
+ # 'expire' is a timestamp for when the data should be removed from
+ # the database so we don't have years worth of junk.
+ #
+ # INDEXES:
+ # 'run' is indexed so you can find out details of a run if the run
+ # was retreived from the data table.
+ # 'name' is indexed so that two all run names are forced to be unique
+ #
+ self.query('''
+ CREATE TABLE runs(
+ rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ rn_name VARCHAR(200) NOT NULL,
+ rn_sample VARCHAR(32) NOT NULL,
+ rn_user VARCHAR(32) NOT NULL,
+ rn_project VARCHAR(100) NOT NULL,
+ rn_date TIMESTAMP NOT NULL,
+ rn_expire TIMESTAMP NOT NULL,
+ PRIMARY KEY (rn_id),
+ UNIQUE (rn_name,rn_sample)
+ ) TYPE=InnoDB''')
+
+ #
+ # We keep the bin names separate so that the data table doesn't get
+ # huge since bin names are frequently repeated.
+ #
+ # COLUMNS:
+ # 'id' is the unique bin identifer.
+ # 'name' is the string name for the bin.
+ #
+ # INDEXES:
+ # 'bin' is indexed to get the name of a bin when data is retrieved
+ # via the data table.
+ # 'name' is indexed to get the bin id for a named bin when you want
+ # to search the data table based on a specific bin.
+ #
+ self.query('''
+ CREATE TABLE bins(
+ bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ bn_name VARCHAR(255) NOT NULL,
+ PRIMARY KEY(bn_id),
+ UNIQUE (bn_name)
+ ) TYPE=InnoDB''')
+
+ #
+ # The stat table gives us all of the data for a particular stat.
+ #
+ # COLUMNS:
+ # 'stat' is a unique identifier for each stat to be used in other
+ # tables for references.
+ # 'name' is simply the simulator derived name for a given
+ # statistic.
+ # 'descr' is the description of the statistic and what it tells
+ # you.
+ # 'type' defines what the stat tells you. Types are:
+ # SCALAR: A simple scalar statistic that holds one value
+ # VECTOR: An array of statistic values. Such a something that
+ # is generated per-thread. Vectors exist to give averages,
+ # pdfs, cdfs, means, standard deviations, etc across the
+ # stat values.
+ # DIST: Is a distribution of data. When the statistic value is
+ # sampled, its value is counted in a particular bucket.
+ # Useful for keeping track of utilization of a resource.
+ # (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
+ # VECTORDIST: Can be used when the distribution needs to be
+ # factored out into a per-thread distribution of data for
+ # example. It can still be summed across threads to find
+ # the total distribution.
+ # VECTOR2D: Can be used when you have a stat that is not only
+ # per-thread, but it is per-something else. Like
+ # per-message type.
+ # FORMULA: This statistic is a formula, and its data must be
+ # looked up in the formula table, for indicating how to
+ # present its values.
+ # 'subdata' is potentially used by any of the vector types to
+ # give a specific name to all of the data elements within a
+ # stat.
+ # 'print' indicates whether this stat should be printed ever.
+ # (Unnamed stats don't usually get printed)
+ # 'prereq' only print the stat if the prereq is not zero.
+ # 'prec' number of decimal places to print
+ # 'nozero' don't print zero values
+ # 'nonan' don't print NaN values
+ # 'total' for vector type stats, print the total.
+ # 'pdf' for vector type stats, print the pdf.
+ # 'cdf' for vector type stats, print the cdf.
+ #
+ # The Following are for dist type stats:
+ # 'min' is the minimum bucket value. Anything less is an underflow.
+ # 'max' is the maximum bucket value. Anything more is an overflow.
+ # 'bktsize' is the approximate number of entries in each bucket.
+ # 'size' is the number of buckets. equal to (min/max)/bktsize.
+ #
+ # INDEXES:
+ # 'stat' is indexed so that you can find out details about a stat
+ # if the stat id was retrieved from the data table.
+ # 'name' is indexed so that you can simply look up data about a
+ # named stat.
+ #
+ self.query('''
+ CREATE TABLE stats(
+ st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ st_name VARCHAR(255) NOT NULL,
+ st_descr TEXT NOT NULL,
+ st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
+ "VECTOR2D", "FORMULA") NOT NULL,
+ st_print BOOL NOT NULL,
+ st_prereq SMALLINT UNSIGNED NOT NULL,
+ st_prec TINYINT NOT NULL,
+ st_nozero BOOL NOT NULL,
+ st_nonan BOOL NOT NULL,
+ st_total BOOL NOT NULL,
+ st_pdf BOOL NOT NULL,
+ st_cdf BOOL NOT NULL,
+ st_min DOUBLE NOT NULL,
+ st_max DOUBLE NOT NULL,
+ st_bktsize DOUBLE NOT NULL,
+ st_size SMALLINT UNSIGNED NOT NULL,
+ PRIMARY KEY (st_id),
+ UNIQUE (st_name)
+ ) TYPE=InnoDB''')
+
+ #
+ # This is the main table of data for stats.
+ #
+ # COLUMNS:
+ # 'stat' refers to the stat field given in the stat table.
+ #
+ # 'x' referrs to the first dimension of a multi-dimensional stat. For
+ # a vector, x will start at 0 and increase for each vector
+ # element.
+ # For a distribution:
+ # -1: sum (for calculating standard deviation)
+ # -2: sum of squares (for calculating standard deviation)
+ # -3: total number of samples taken (for calculating
+ # standard deviation)
+ # -4: minimum value
+ # -5: maximum value
+ # -6: underflow
+ # -7: overflow
+ # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
+ # dimension.
+ # 'run' is the run that the data was generated from. Details up in
+ # the run table
+ # 'tick' is a timestamp generated by the simulator.
+ # 'bin' is the name of the bin that the data was generated in, if
+ # any.
+ # 'data' is the actual stat value.
+ #
+ # INDEXES:
+ # 'stat' is indexed so that a user can find all of the data for a
+ # particular stat. It is not unique, because that specific stat
+ # can be found in many runs, bins, and samples, in addition to
+ # having entries for the mulidimensional cases.
+ # 'run' is indexed to allow a user to remove all of the data for a
+ # particular execution run. It can also be used to allow the
+ # user to print out all of the data for a given run.
+ #
+ self.query('''
+ CREATE TABLE data(
+ dt_stat SMALLINT UNSIGNED NOT NULL,
+ dt_x SMALLINT NOT NULL,
+ dt_y SMALLINT NOT NULL,
+ dt_run SMALLINT UNSIGNED NOT NULL,
+ dt_tick BIGINT UNSIGNED NOT NULL,
+ dt_bin SMALLINT UNSIGNED NOT NULL,
+ dt_data DOUBLE NOT NULL,
+ INDEX (dt_stat),
+ INDEX (dt_run),
+ UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
+ ) TYPE=InnoDB;''')
+
+ #
+ # Names and descriptions for multi-dimensional stats (vectors, etc.)
+ # are stored here instead of having their own entry in the statistics
+ # table. This allows all parts of a single stat to easily share a
+ # single id.
+ #
+ # COLUMNS:
+ # 'stat' is the unique stat identifier from the stat table.
+ # 'x' is the first dimension for multi-dimensional stats
+ # corresponding to the data table above.
+ # 'y' is the second dimension for multi-dimensional stats
+ # corresponding to the data table above.
+ # 'name' is the specific subname for the unique stat,x,y combination.
+ # 'descr' is the specific description for the uniqe stat,x,y
+ # combination.
+ #
+ # INDEXES:
+ # 'stat' is indexed so you can get the subdata for a specific stat.
+ #
+ self.query('''
+ CREATE TABLE subdata(
+ sd_stat SMALLINT UNSIGNED NOT NULL,
+ sd_x SMALLINT NOT NULL,
+ sd_y SMALLINT NOT NULL,
+ sd_name VARCHAR(255) NOT NULL,
+ sd_descr TEXT,
+ UNIQUE (sd_stat,sd_x,sd_y)
+ ) TYPE=InnoDB''')
+
+
+ #
+ # The formula table is maintained separately from the data table
+ # because formula data, unlike other stat data cannot be represented
+ # there.
+ #
+ # COLUMNS:
+ # 'stat' refers to the stat field generated in the stat table.
+ # 'formula' is the actual string representation of the formula
+ # itself.
+ #
+ # INDEXES:
+ # 'stat' is indexed so that you can just look up a formula.
+ #
+ self.query('''
+ CREATE TABLE formulas(
+ fm_stat SMALLINT UNSIGNED NOT NULL,
+ fm_formula BLOB NOT NULL,
+ PRIMARY KEY(fm_stat)
+ ) TYPE=InnoDB''')
+
+ #
+ # Each stat used in each formula is kept in this table. This way, if
+ # you want to print out a particular formula, you can simply find out
+ # which stats you need by looking in this table. Additionally, when
+ # you remove a stat from the stats table and data table, you remove
+ # any references to the formula in this table. When a formula is no
+ # longer referred to, you remove its entry.
+ #
+ # COLUMNS:
+ # 'stat' is the stat id from the stat table above.
+ # 'child' is the stat id of a stat that is used for this formula.
+ # There may be many children for any given 'stat' (formula)
+ #
+ # INDEXES:
+ # 'stat' is indexed so you can look up all of the children for a
+ # particular stat.
+ # 'child' is indexed so that you can remove an entry when a stat is
+ # removed.
+ #
+ self.query('''
+ CREATE TABLE formula_ref(
+ fr_stat SMALLINT UNSIGNED NOT NULL,
+ fr_run SMALLINT UNSIGNED NOT NULL,
+ UNIQUE (fr_stat,fr_run),
+ INDEX (fr_stat),
+ INDEX (fr_run)
+ ) TYPE=InnoDB''')
+
+ # COLUMNS:
+ # 'event' is the unique event id from the event_desc table
+ # 'run' is simulation run id that this event took place in
+ # 'tick' is the tick when the event happened
+ #
+ # INDEXES:
+ # 'event' is indexed so you can look up all occurences of a
+ # specific event
+ # 'run' is indexed so you can find all events in a run
+ # 'tick' is indexed because we want the unique thing anyway
+ # 'event,run,tick' is unique combination
+ self.query('''
+ CREATE TABLE events(
+ ev_event SMALLINT UNSIGNED NOT NULL,
+ ev_run SMALLINT UNSIGNED NOT NULL,
+ ev_tick BIGINT UNSIGNED NOT NULL,
+ INDEX(ev_event),
+ INDEX(ev_run),
+ INDEX(ev_tick),
+ UNIQUE(ev_event,ev_run,ev_tick)
+ ) TYPE=InnoDB''')
+
+ # COLUMNS:
+ # 'id' is the unique description id
+ # 'name' is the name of the event that occurred
+ #
+ # INDEXES:
+ # 'id' is indexed because it is the primary key and is what you use
+ # to look up the descriptions
+ # 'name' is indexed so one can find the event based on name
+ #
+ self.query('''
+ CREATE TABLE event_names(
+ en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ en_name VARCHAR(255) NOT NULL,
+ PRIMARY KEY (en_id),
+ UNIQUE (en_name)
+ ) TYPE=InnoDB''')
+
+ def clean(self):
+ self.query('''
+ DELETE data
+ FROM data
+ LEFT JOIN runs ON dt_run=rn_id
+ WHERE rn_id IS NULL''')
+
+ self.query('''
+ DELETE formula_ref
+ FROM formula_ref
+ LEFT JOIN runs ON fr_run=rn_id
+ WHERE rn_id IS NULL''')
+
+ self.query('''
+ DELETE formulas
+ FROM formulas
+ LEFT JOIN formula_ref ON fm_stat=fr_stat
+ WHERE fr_stat IS NULL''')
+
+ self.query('''
+ DELETE stats
+ FROM stats
+ LEFT JOIN data ON st_id=dt_stat
+ WHERE dt_stat IS NULL''')
+
+ self.query('''
+ DELETE subdata
+ FROM subdata
+ LEFT JOIN data ON sd_stat=dt_stat
+ WHERE dt_stat IS NULL''')
+
+ self.query('''
+ DELETE bins
+ FROM bins
+ LEFT JOIN data ON bn_id=dt_bin
+ WHERE dt_bin IS NULL''')
+
+ self.query('''
+ DELETE events
+ FROM events
+ LEFT JOIN runs ON ev_run=rn_id
+ WHERE rn_id IS NULL''')
+
+ self.query('''
+ DELETE event_names
+ FROM event_names
+ LEFT JOIN events ON en_id=ev_event
+ WHERE ev_event IS NULL''')
diff --git a/util/stats/display.py b/util/stats/display.py
new file mode 100644
index 000000000..68a26852d
--- /dev/null
+++ b/util/stats/display.py
@@ -0,0 +1,124 @@
+class Value:
+ def __init__(self, value, precision, percent = False):
+ self.value = value
+ self.precision = precision
+ self.percent = percent
+ def __str__(self):
+ if isinstance(self.value, str):
+ if self.value.lower() == 'nan':
+ value = 'NaN'
+ if self.value.lower() == 'inf':
+ value = 'Inf'
+ else:
+ if self.precision >= 0:
+ format = "%%.%df" % self.precision
+ elif self.value == 0.0:
+ format = "%.0f"
+ elif self.value % 1.0 == 0.0:
+ format = "%.0f"
+ else:
+ format = "%f"
+ value = self.value
+ if self.percent:
+ value = value * 100.0
+ value = format % value
+
+ if self.percent:
+ value = value + "%"
+
+ return value
+
+class Print:
+ def __init__(self, **vals):
+ self.__dict__.update(vals)
+
+ def __str__(self):
+ value = Value(self.value, self.precision)
+ pdf = ''
+ cdf = ''
+ if self.__dict__.has_key('pdf'):
+ pdf = Value(self.pdf, 2, True)
+ if self.__dict__.has_key('cdf'):
+ cdf = Value(self.cdf, 2, True)
+
+ output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf)
+
+ if descriptions and self.__dict__.has_key('desc') and self.desc:
+ output = "%s # %s" % (output, self.desc)
+
+ return output
+
+ def doprint(self):
+ if display_all:
+ return True
+ if self.value == 0.0 and (self.flags & flags_nozero):
+ return False
+ if isinstance(self.value, str):
+ if self.value == 'NaN' and (self.flags & flags_nonan):
+ return False
+ return True
+
+ def display(self):
+ if self.doprint():
+ print self
+
+class VectorDisplay:
+ def display(self):
+ p = Print()
+ p.flags = self.flags
+ p.precision = self.precision
+
+ if issequence(self.value):
+ if not len(self.value):
+ return
+
+ mytotal = reduce(lambda x,y: float(x) + float(y), self.value)
+ mycdf = 0.0
+
+ value = self.value
+
+ if display_all:
+ subnames = [ '[%d]' % i for i in range(len(value)) ]
+ else:
+ subnames = [''] * len(value)
+
+ if self.__dict__.has_key('subnames'):
+ for i,each in enumerate(self.subnames):
+ if len(each) > 0:
+ subnames[i] = '.%s' % each
+
+ subdescs = [self.desc]*len(value)
+ if self.__dict__.has_key('subdescs'):
+ for i in xrange(min(len(value), len(self.subdescs))):
+ subdescs[i] = self.subdescs[i]
+
+ for val,sname,sdesc in map(None, value, subnames, subdescs):
+ if mytotal > 0.0:
+ mypdf = float(val) / float(mytotal)
+ mycdf += mypdf
+ if (self.flags & flags_pdf):
+ p.pdf = mypdf
+ p.cdf = mycdf
+
+ if len(sname) == 0:
+ continue
+
+ p.name = self.name + sname
+ p.desc = sdesc
+ p.value = val
+ p.display()
+
+ if (self.flags & flags_total):
+ if (p.__dict__.has_key('pdf')): del p.__dict__['pdf']
+ if (p.__dict__.has_key('cdf')): del p.__dict__['cdf']
+ p.name = self.name + '.total'
+ p.desc = self.desc
+ p.value = mytotal
+ p.display()
+
+ else:
+ p.name = self.name
+ p.desc = self.desc
+ p.value = self.value
+ p.display()
+
diff --git a/util/stats/flags.py b/util/stats/flags.py
new file mode 100644
index 000000000..7a57e722b
--- /dev/null
+++ b/util/stats/flags.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2004 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+init = 0x00000001
+printable = 0x00000002
+total = 0x00000010
+pdf = 0x00000020
+cdf = 0x00000040
+dist = 0x00000080
+nozero = 0x00000100
+nonan = 0x00000200
diff --git a/util/stats/info.py b/util/stats/info.py
new file mode 100644
index 000000000..a94563cf9
--- /dev/null
+++ b/util/stats/info.py
@@ -0,0 +1,724 @@
+from __future__ import division
+import operator, re, types
+
+source = None
+display_run = 0
+
+def issequence(t):
+ return isinstance(t, types.TupleType) or isinstance(t, types.ListType)
+
+def total(f):
+ if isinstance(f, FormulaStat):
+ v = f.value
+ else:
+ v = f
+
+ f = FormulaStat()
+ if issequence(v):
+ f.value = reduce(operator.add, v)
+ else:
+ f.value = v
+
+ return f
+
+def unaryop(op, f):
+ if isinstance(f, FormulaStat):
+ v = f.value
+ else:
+ v = f
+
+ if issequence(v):
+ return map(op, v)
+ else:
+ return op(v)
+
+def zerodiv(lv, rv):
+ if rv == 0.0:
+ return 0.0
+ else:
+ return operator.truediv(lv, rv)
+
+def wrapop(op, lv, rv):
+ if isinstance(lv, str):
+ return lv
+
+ if isinstance(rv, str):
+ return rv
+
+ return op(lv, rv)
+
+def same(lv, rv):
+ for lrun,rrun in zip(lv.keys(),rv.keys()):
+ if lrun != rrun:
+ print 'lrun != rrun'
+ print lrun, rrun
+ print lv.keys()
+ print rv.keys()
+ return False
+ for lx,rx in zip(lv[lrun].keys(),rv[rrun].keys()):
+ if lx != rx:
+ print 'lx != rx'
+ print lx, rx
+ print lv[lrun].keys()
+ print rv[rrun].keys()
+ return False
+ for ly,ry in zip(lv[lrun][lx].keys(),rv[rrun][rx].keys()):
+ if ly != ry:
+ print 'ly != ry'
+ print ly, ry
+ print lv[lrun][lx].keys()
+ print rv[rrun][rx].keys()
+ return False
+ return True
+
+
+def binaryop(op, lf, rf):
+ result = {}
+
+ if isinstance(lf, FormulaStat) and isinstance(rf, FormulaStat):
+ lv = lf.value
+ rv = rf.value
+
+ if not same(lv, rv):
+ raise AttributeError, "run,x,y not identical"
+
+ for run in lv.keys():
+ result[run] = {}
+ for x in lv[run].keys():
+ result[run][x] = {}
+ for y in lv[run][x].keys():
+ result[run][x][y] = wrapop(op, lv[run][x][y],
+ rv[run][x][y])
+ elif isinstance(lf, FormulaStat):
+ lv = lf.value
+ for run in lv.keys():
+ result[run] = {}
+ for x in lv[run].keys():
+ result[run][x] = {}
+ for y in lv[run][x].keys():
+ result[run][x][y] = wrapop(op, lv[run][x][y], rf)
+ elif isinstance(rf, FormulaStat):
+ rv = rf.value
+ for run in rv.keys():
+ result[run] = {}
+ for x in rv[run].keys():
+ result[run][x] = {}
+ for y in rv[run][x].keys():
+ result[run][x][y] = wrapop(op, lf, rv[run][x][y])
+
+ return result
+
+def sums(x, y):
+ if issequence(x):
+ return map(lambda x, y: x + y, x, y)
+ else:
+ return x + y
+
+def alltrue(list):
+ return reduce(lambda x, y: x and y, list)
+
+def allfalse(list):
+ return not reduce(lambda x, y: x or y, list)
+
+def enumerate(list):
+ return map(None, range(len(list)), list)
+
+def cmp(a, b):
+ if a < b:
+ return -1
+ elif a == b:
+ return 0
+ else:
+ return 1
+
+class Statistic(object):
+ def __init__(self, data):
+ self.__dict__.update(data.__dict__)
+ if not self.__dict__.has_key('value'):
+ self.__dict__['value'] = None
+ if not self.__dict__.has_key('bins'):
+ self.__dict__['bins'] = None
+ if not self.__dict__.has_key('ticks'):
+ self.__dict__['ticks'] = None
+
+ def __getattribute__(self, attr):
+ if attr == 'value':
+ if self.__dict__['value'] == None:
+ self.__dict__['value'] = self.getValue()
+ return self.__dict__['value']
+ else:
+ return super(Statistic, self).__getattribute__(attr)
+
+ def __setattr__(self, attr, value):
+ if attr == 'bins' or attr == 'ticks':
+ if attr == 'bins':
+ global db
+ if value is not None:
+ value = db.getBin(value)
+ elif attr == 'samples' and type(value) is str:
+ value = [ int(x) for x in value.split() ]
+
+ self.__dict__[attr] = value
+ self.__dict__['value'] = None
+ else:
+ super(Statistic, self).__setattr__(attr, value)
+
+ def getValue(self):
+ raise AttributeError, 'getValue() must be defined'
+
+ def zero(self):
+ return False
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __str__(self):
+ return '%f' % (float(self))
+
+class FormulaStat(object):
+ def __add__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(operator.add, self, other)
+ return f
+ def __sub__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(operator.sub, self, other)
+ return f
+ def __mul__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(operator.mul, self, other)
+ return f
+ def __truediv__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(zerodiv, self, other)
+ return f
+ def __mod__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(operator.mod, self, other)
+ return f
+ def __radd__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(operator.add, other, self)
+ return f
+ def __rsub__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(operator.sub, other, self)
+ return f
+ def __rmul__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(operator.mul, other, self)
+ return f
+ def __rtruediv__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(zerodiv, other, self)
+ return f
+ def __rmod__(self, other):
+ f = FormulaStat()
+ f.value = binaryop(operator.mod, other, self)
+ return f
+ def __neg__(self):
+ f = FormulaStat()
+ f.value = unaryop(operator.neg, self)
+ return f
+ def __getitem__(self, idx):
+ f = FormulaStat()
+ f.value = {}
+ for key in self.value.keys():
+ f.value[key] = {}
+ f.value[key][0] = {}
+ f.value[key][0][0] = self.value[key][idx][0]
+ return f
+
+ def __float__(self):
+ if isinstance(self.value, FormulaStat):
+ return float(self.value)
+ if not self.value.has_key(display_run):
+ return (1e300*1e300)
+ if len(self.value[display_run]) == 1:
+ return self.value[display_run][0][0]
+ else:
+ #print self.value[display_run]
+ return self.value[display_run][4][0]
+ #raise ValueError
+
+ def display(self):
+ import display
+ d = display.VectorDisplay()
+ d.flags = 0
+ d.precision = 1
+ d.name = 'formula'
+ d.desc = 'formula'
+ val = self.value[display_run]
+ d.value = [ val[x][0] for x in val.keys() ]
+ d.display()
+
+
+class Scalar(Statistic,FormulaStat):
+ def getValue(self):
+ return source.data(self, self.bins)
+
+ def display(self):
+ import display
+ p = display.Print()
+ p.name = self.name
+ p.desc = self.desc
+ p.value = float(self)
+ p.flags = self.flags
+ p.precision = self.precision
+ if display.all or (self.flags & flags.printable):
+ p.display()
+
+ def comparable(self, other):
+ return self.name == other.name
+
+ def __eq__(self, other):
+ return self.value == other.value
+
+ def __isub__(self, other):
+ self.value -= other.value
+ return self
+
+ def __iadd__(self, other):
+ self.value += other.value
+ return self
+
+ def __itruediv__(self, other):
+ if not other:
+ return self
+ self.value /= other
+ return self
+
+class Vector(Statistic,FormulaStat):
+ def getValue(self):
+ return source.data(self, self.bins);
+
+ def display(self):
+ import display
+ if not display.all and not (self.flags & flags.printable):
+ return
+
+ d = display.VectorDisplay()
+ d.__dict__.update(self.__dict__)
+ d.display()
+
+ def comparable(self, other):
+ return self.name == other.name and \
+ len(self.value) == len(other.value)
+
+ def __eq__(self, other):
+ if issequence(self.value) != issequence(other.value):
+ return false
+
+ if issequence(self.value):
+ if len(self.value) != len(other.value):
+ return False
+ else:
+ for v1,v2 in zip(self.value, other.value):
+ if v1 != v2:
+ return False
+ return True
+ else:
+ return self.value == other.value
+
+ def __isub__(self, other):
+ self.value = binaryop(operator.sub, self.value, other.value)
+ return self
+
+ def __iadd__(self, other):
+ self.value = binaryop(operator.add, self.value, other.value)
+ return self
+
+ def __itruediv__(self, other):
+ if not other:
+ return self
+ if issequence(self.value):
+ for i in xrange(len(self.value)):
+ self.value[i] /= other
+ else:
+ self.value /= other
+ return self
+
+class Formula(Vector):
+ def getValue(self):
+ formula = re.sub(':', '__', self.formula)
+ x = eval(formula, source.stattop)
+ return x.value
+
+ def comparable(self, other):
+ return self.name == other.name and \
+ compare(self.dist, other.dist)
+
+ def __eq__(self, other):
+ return self.value == other.value
+
+ def __isub__(self, other):
+ return self
+
+ def __iadd__(self, other):
+ return self
+
+ def __itruediv__(self, other):
+ if not other:
+ return self
+ return self
+
+class SimpleDist(object):
+ def __init__(self, sums, squares, samples):
+ self.sums = sums
+ self.squares = squares
+ self.samples = samples
+
+ def getValue(self):
+ return 0.0
+
+ def display(self, name, desc, flags, precision):
+ import display
+ p = display.Print()
+ p.flags = flags
+ p.precision = precision
+
+ if self.samples > 0:
+ p.name = name + ".mean"
+ p.value = self.sums / self.samples
+ p.display()
+
+ p.name = name + ".stdev"
+ if self.samples > 1:
+ var = (self.samples * self.squares - self.sums ** 2) \
+ / (self.samples * (self.samples - 1))
+ if var >= 0:
+ p.value = math.sqrt(var)
+ else:
+ p.value = 'NaN'
+ else:
+ p.value = 0.0
+ p.display()
+
+ p.name = name + ".samples"
+ p.value = self.samples
+ p.display()
+
+ def comparable(self, other):
+ return True
+
+ def __eq__(self, other):
+ return self.sums == other.sums and self.squares == other.squares and \
+ self.samples == other.samples
+
+ def __isub__(self, other):
+ self.sums -= other.sums
+ self.squares -= other.squares
+ self.samples -= other.samples
+ return self
+
+ def __iadd__(self, other):
+ self.sums += other.sums
+ self.squares += other.squares
+ self.samples += other.samples
+ return self
+
+ def __itruediv__(self, other):
+ if not other:
+ return self
+ self.sums /= other
+ self.squares /= other
+ self.samples /= other
+ return self
+
+class FullDist(SimpleDist):
+ def __init__(self, sums, squares, samples, minval, maxval,
+ under, vec, over, min, max, bsize, size):
+ self.sums = sums
+ self.squares = squares
+ self.samples = samples
+ self.minval = minval
+ self.maxval = maxval
+ self.under = under
+ self.vec = vec
+ self.over = over
+ self.min = min
+ self.max = max
+ self.bsize = bsize
+ self.size = size
+
+ def getValue(self):
+ return 0.0
+
+ def display(self, name, desc, flags, precision):
+ import display
+ p = display.Print()
+ p.flags = flags
+ p.precision = precision
+
+ p.name = name + '.min_val'
+ p.value = self.minval
+ p.display()
+
+ p.name = name + '.max_val'
+ p.value = self.maxval
+ p.display()
+
+ p.name = name + '.underflow'
+ p.value = self.under
+ p.display()
+
+ i = self.min
+ for val in self.vec[:-1]:
+ p.name = name + '[%d:%d]' % (i, i + self.bsize - 1)
+ p.value = val
+ p.display()
+ i += self.bsize
+
+ p.name = name + '[%d:%d]' % (i, self.max)
+ p.value = self.vec[-1]
+ p.display()
+
+
+ p.name = name + '.overflow'
+ p.value = self.over
+ p.display()
+
+ SimpleDist.display(self, name, desc, flags, precision)
+
+ def comparable(self, other):
+ return self.min == other.min and self.max == other.max and \
+ self.bsize == other.bsize and self.size == other.size
+
+ def __eq__(self, other):
+ return self.sums == other.sums and self.squares == other.squares and \
+ self.samples == other.samples
+
+ def __isub__(self, other):
+ self.sums -= other.sums
+ self.squares -= other.squares
+ self.samples -= other.samples
+
+ if other.samples:
+ self.minval = min(self.minval, other.minval)
+ self.maxval = max(self.maxval, other.maxval)
+ self.under -= under
+ self.vec = map(lambda x,y: x - y, self.vec, other.vec)
+ self.over -= over
+ return self
+
+ def __iadd__(self, other):
+ if not self.samples and other.samples:
+ self = other
+ return self
+
+ self.sums += other.sums
+ self.squares += other.squares
+ self.samples += other.samples
+
+ if other.samples:
+ self.minval = min(self.minval, other.minval)
+ self.maxval = max(self.maxval, other.maxval)
+ self.under += other.under
+ self.vec = map(lambda x,y: x + y, self.vec, other.vec)
+ self.over += other.over
+ return self
+
+ def __itruediv__(self, other):
+ if not other:
+ return self
+ self.sums /= other
+ self.squares /= other
+ self.samples /= other
+
+ if self.samples:
+ self.under /= other
+ for i in xrange(len(self.vec)):
+ self.vec[i] /= other
+ self.over /= other
+ return self
+
+class Dist(Statistic):
+ def getValue(self):
+ return 0.0
+
+ def display(self):
+ import display
+ if not display.all and not (self.flags & flags.printable):
+ return
+
+ self.dist.display(self.name, self.desc, self.flags, self.precision)
+
+ def comparable(self, other):
+ return self.name == other.name and \
+ self.dist.compareable(other.dist)
+
+ def __eq__(self, other):
+ return self.dist == other.dist
+
+ def __isub__(self, other):
+ self.dist -= other.dist
+ return self
+
+ def __iadd__(self, other):
+ self.dist += other.dist
+ return self
+
+ def __itruediv__(self, other):
+ if not other:
+ return self
+ self.dist /= other
+ return self
+
+class VectorDist(Statistic):
+ def getValue(self):
+ return 0.0
+
+ def display(self):
+ import display
+ if not display.all and not (self.flags & flags.printable):
+ return
+
+ if isinstance(self.dist, SimpleDist):
+ return
+
+ for dist,sn,sd,i in map(None, self.dist, self.subnames, self.subdescs,
+ range(len(self.dist))):
+ if len(sn) > 0:
+ name = '%s.%s' % (self.name, sn)
+ else:
+ name = '%s[%d]' % (self.name, i)
+
+ if len(sd) > 0:
+ desc = sd
+ else:
+ desc = self.desc
+
+ dist.display(name, desc, self.flags, self.precision)
+
+ if (self.flags & flags.total) or 1:
+ if isinstance(self.dist[0], SimpleDist):
+ disttotal = SimpleDist( \
+ reduce(sums, [d.sums for d in self.dist]),
+ reduce(sums, [d.squares for d in self.dist]),
+ reduce(sums, [d.samples for d in self.dist]))
+ else:
+ disttotal = FullDist( \
+ reduce(sums, [d.sums for d in self.dist]),
+ reduce(sums, [d.squares for d in self.dist]),
+ reduce(sums, [d.samples for d in self.dist]),
+ min([d.minval for d in self.dist]),
+ max([d.maxval for d in self.dist]),
+ reduce(sums, [d.under for d in self.dist]),
+ reduce(sums, [d.vec for d in self.dist]),
+ reduce(sums, [d.over for d in self.dist]),
+ dist[0].min,
+ dist[0].max,
+ dist[0].bsize,
+ dist[0].size)
+
+ name = '%s.total' % (self.name)
+ desc = self.desc
+ disttotal.display(name, desc, self.flags, self.precision)
+
+ def comparable(self, other):
+ return self.name == other.name and \
+ alltrue(map(lambda x, y : x.comparable(y),
+ self.dist,
+ other.dist))
+
+ def __eq__(self, other):
+ return alltrue(map(lambda x, y : x == y, self.dist, other.dist))
+
+ def __isub__(self, other):
+ if issequence(self.dist) and issequence(other.dist):
+ for sd,od in zip(self.dist, other.dist):
+ sd -= od
+ else:
+ self.dist -= other.dist
+ return self
+
+ def __iadd__(self, other):
+ if issequence(self.dist) and issequence(other.dist):
+ for sd,od in zip(self.dist, other.dist):
+ sd += od
+ else:
+ self.dist += other.dist
+ return self
+
+ def __itruediv__(self, other):
+ if not other:
+ return self
+ if issequence(self.dist):
+ for dist in self.dist:
+ dist /= other
+ else:
+ self.dist /= other
+ return self
+
+class Vector2d(Statistic):
+ def getValue(self):
+ return 0.0
+
+ def display(self):
+ import display
+ if not display.all and not (self.flags & flags.printable):
+ return
+
+ d = display.VectorDisplay()
+ d.__dict__.update(self.__dict__)
+
+ if self.__dict__.has_key('ysubnames'):
+ ysubnames = list(self.ysubnames)
+ slack = self.x - len(ysubnames)
+ if slack > 0:
+ ysubnames.extend(['']*slack)
+ else:
+ ysubnames = range(self.x)
+
+ for x,sname in enumerate(ysubnames):
+ o = x * self.y
+ d.value = self.value[o:o+self.y]
+ d.name = '%s[%s]' % (self.name, sname)
+ d.display()
+
+ if self.flags & flags.total:
+ d.value = []
+ for y in range(self.y):
+ xtot = 0.0
+ for x in range(self.x):
+ xtot += self.value[y + x * self.x]
+ d.value.append(xtot)
+
+ d.name = self.name + '.total'
+ d.display()
+
+ def comparable(self, other):
+ return self.name == other.name and self.x == other.x and \
+ self.y == other.y
+
+ def __eq__(self, other):
+ return True
+
+ def __isub__(self, other):
+ return self
+
+ def __iadd__(self, other):
+ return self
+
+ def __itruediv__(self, other):
+ if not other:
+ return self
+ return self
+
+def NewStat(data):
+ stat = None
+ if data.type == 'SCALAR':
+ stat = Scalar(data)
+ elif data.type == 'VECTOR':
+ stat = Vector(data)
+ elif data.type == 'DIST':
+ stat = Dist(data)
+ elif data.type == 'VECTORDIST':
+ stat = VectorDist(data)
+ elif data.type == 'VECTOR2D':
+ stat = Vector2d(data)
+ elif data.type == 'FORMULA':
+ stat = Formula(data)
+
+ return stat
+
diff --git a/util/stats/print.py b/util/stats/print.py
new file mode 100644
index 000000000..f4492cd2b
--- /dev/null
+++ b/util/stats/print.py
@@ -0,0 +1,127 @@
+all = False
+descriptions = False
+
+class Value:
+ def __init__(self, value, precision, percent = False):
+ self.value = value
+ self.precision = precision
+ self.percent = percent
+ def __str__(self):
+ if isinstance(self.value, str):
+ if self.value.lower() == 'nan':
+ value = 'NaN'
+ if self.value.lower() == 'inf':
+ value = 'Inf'
+ else:
+ if self.precision >= 0:
+ format = "%%.%df" % self.precision
+ elif self.value == 0.0:
+ format = "%.0f"
+ elif self.value % 1.0 == 0.0:
+ format = "%.0f"
+ else:
+ format = "%f"
+ value = self.value
+ if self.percent:
+ value = value * 100.0
+ value = format % value
+
+ if self.percent:
+ value = value + "%"
+
+ return value
+
+class Print:
+ def __init__(self, **vals):
+ self.__dict__.update(vals)
+
+ def __str__(self):
+ value = Value(self.value, self.precision)
+ pdf = ''
+ cdf = ''
+ if self.__dict__.has_key('pdf'):
+ pdf = Value(self.pdf, 2, True)
+ if self.__dict__.has_key('cdf'):
+ cdf = Value(self.cdf, 2, True)
+
+ output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf)
+
+ if descriptions and self.__dict__.has_key('desc') and self.desc:
+ output = "%s # %s" % (output, self.desc)
+
+ return output
+
+ def doprint(self):
+ if display_all:
+ return True
+ if self.value == 0.0 and (self.flags & flags_nozero):
+ return False
+ if isinstance(self.value, str):
+ if self.value == 'NaN' and (self.flags & flags_nonan):
+ return False
+ return True
+
+ def display(self):
+ if self.doprint():
+ print self
+
+class VectorDisplay:
+ def display(self):
+ p = Print()
+ p.flags = self.flags
+ p.precision = self.precision
+
+ if issequence(self.value):
+ if not len(self.value):
+ return
+
+ mytotal = reduce(lambda x,y: float(x) + float(y), self.value)
+ mycdf = 0.0
+
+ value = self.value
+
+ if display_all:
+ subnames = [ '[%d]' % i for i in range(len(value)) ]
+ else:
+ subnames = [''] * len(value)
+
+ if self.__dict__.has_key('subnames'):
+ for i,each in enumerate(self.subnames):
+ if len(each) > 0:
+ subnames[i] = '.%s' % each
+
+ subdescs = [self.desc]*len(value)
+ if self.__dict__.has_key('subdescs'):
+ for i in xrange(min(len(value), len(self.subdescs))):
+ subdescs[i] = self.subdescs[i]
+
+ for val,sname,sdesc in map(None, value, subnames, subdescs):
+ if mytotal > 0.0:
+ mypdf = float(val) / float(mytotal)
+ mycdf += mypdf
+ if (self.flags & flags_pdf):
+ p.pdf = mypdf
+ p.cdf = mycdf
+
+ if len(sname) == 0:
+ continue
+
+ p.name = self.name + sname
+ p.desc = sdesc
+ p.value = val
+ p.display()
+
+ if (self.flags & flags_total):
+ if (p.__dict__.has_key('pdf')): del p.__dict__['pdf']
+ if (p.__dict__.has_key('cdf')): del p.__dict__['cdf']
+ p.name = self.name + '.total'
+ p.desc = self.desc
+ p.value = mytotal
+ p.display()
+
+ else:
+ p.name = self.name
+ p.desc = self.desc
+ p.value = self.value
+ p.display()
+
diff --git a/util/stats/stats.py b/util/stats/stats.py
new file mode 100755
index 000000000..1d521fd9d
--- /dev/null
+++ b/util/stats/stats.py
@@ -0,0 +1,478 @@
+#!/usr/bin/env python
+from __future__ import division
+import re, sys
+
+def usage():
+ print '''\
+Usage: %s [-E] [-F] [-d <db> ] [-g <get> ] [-h <host>] [-p]
+ [-s <system>] [-r <runs> ] [-u <username>] <command> [command args]
+''' % sys.argv[0]
+ sys.exit(1)
+
+def getopts(list, flags):
+ import getopt
+ try:
+ opts, args = getopt.getopt(list, flags)
+ except getopt.GetoptError:
+ usage()
+
+ return opts, args
+
+def printval(name, value, invert = False):
+ if invert and value != 0.0:
+ value = 1 / value
+
+ if value == (1e300*1e300):
+ return
+
+ if printval.mode == 'G':
+ print '%s: %g' % (name, value)
+ elif printval.mode != 'F' and value > 1e6:
+ print '%s: %0.5e' % (name, value)
+ else:
+ print '%s: %f' % (name, value)
+
+printval.mode = 'G'
+
+def unique(list):
+ set = {}
+ map(set.__setitem__, list, [])
+ return set.keys()
+
+def graphdata(runs, tag, label, value):
+ import info
+ configs = ['std', 'csa', 'ht1', 'ht4', 'htx', 'ocm', 'occ', 'ocp' ]
+ benchmarks = [ 'm', 's' ]
+ dmas = [ 'x', 'd', 'b' ]
+ caches = [ '1', '2', '3', '4', '5' ]
+ systems = [ 'M' ]
+ checkpoints = [ '1' ]
+
+ names = []
+ for bench in benchmarks:
+ for dma in dmas:
+ for cache in caches:
+ for sys in systems:
+ for cpt in checkpoints:
+ names.append([bench, dma, cache, sys, cpt])
+
+ for bench,dma,cache,sys,cpt in names:
+ base = '%s.%s.%s.%s.%s' % (bench, dma, cache, sys, cpt)
+ fname = '/n/ziff/z/binkertn/graph/data.ibm/%s.%s.dat' % (tag, base)
+ f = open(fname, 'w')
+ print >>f, '#set TITLE = %s' % base
+ print >>f, '#set xlbl = Configuration'
+ print >>f, '#set ylbl = %s' % label
+ print >>f, '#set sublabels = %s' % ' '.join(configs)
+
+ for speed,freq in zip(['s', 'q'],['4GHz','10GHz']):
+ print >>f, '"%s"' % freq,
+ for conf in configs:
+ name = '%s.%s.%s.%s.%s.%s.%s' % (conf, bench, dma, speed,
+ cache, sys, cpt)
+ run = info.source.allRunNames[name]
+ info.display_run = run.run;
+ val = float(value)
+ if val == 1e300*1e300:
+ print >>f, 0.0,
+ else:
+ print >>f, "%f" % val,
+ print >>f
+ f.close()
+
+def printdata(runs, value, invert = False):
+ import info
+ for run in runs:
+ info.display_run = run.run;
+ val = float(value)
+ printval(run.name, val)
+
+class CommandException(Exception):
+ pass
+
+def commands(options, command, args):
+ if command == 'database':
+ if len(args) == 0: raise CommandException
+
+ import dbinit
+ mydb = dbinit.MyDB(options)
+
+ if args[0] == 'drop':
+ if len(args) > 2: raise CommandException
+ mydb.admin()
+ mydb.drop()
+ if len(args) == 2 and args[1] == 'init':
+ mydb.create()
+ mydb.connect()
+ mydb.populate()
+ mydb.close()
+ return
+
+ if args[0] == 'init':
+ if len(args) > 1: raise CommandException
+ mydb.admin()
+ mydb.create()
+ mydb.connect()
+ mydb.populate()
+ mydb.close()
+ return
+
+ if args[0] == 'clean':
+ if len(args) > 1: raise CommandException
+ mydb.connect()
+ mydb.clean()
+ return
+
+ raise CommandException
+
+ import db, info
+ info.source = db.Database()
+ info.source.host = options.host
+ info.source.db = options.db
+ info.source.passwd = options.passwd
+ info.source.user = options.user
+ info.source.connect()
+ info.source.update_dict(globals())
+
+ system = info.source.__dict__[options.system]
+
+ if type(options.get) is str:
+ info.source.get = options.get
+
+ if options.runs is None:
+ runs = info.source.allRuns
+ else:
+ rx = re.compile(options.runs)
+ runs = []
+ for run in info.source.allRuns:
+ if rx.match(run.name):
+ runs.append(run)
+
+ info.display_run = runs[0].run
+
+ if command == 'runs':
+ user = None
+ opts, args = getopts(args, '-u')
+ if len(args):
+ raise CommandException
+ for o,a in opts:
+ if o == '-u':
+ user = a
+ info.source.listRuns(user)
+ return
+
+ if command == 'stats':
+ if len(args) == 0:
+ info.source.listStats()
+ elif len(args) == 1:
+ info.source.listStats(args[0])
+ else:
+ raise CommandException
+
+ return
+
+ if command == 'stat':
+ if len(args) != 1:
+ raise CommandException
+
+ stats = info.source.getStat(args[0])
+ for stat in stats:
+ if graph:
+ graphdata(runs, stat.name, stat.name, stat)
+ else:
+ print stat.name
+ printdata(runs, stat)
+ return
+
+ if command == 'bins':
+ if len(args) == 0:
+ info.source.listBins()
+ elif len(args) == 1:
+ info.source.listBins(args[0])
+ else:
+ raise CommandException
+
+ return
+
+ if command == 'formulas':
+ if len(args) == 0:
+ info.source.listFormulas()
+ elif len(args) == 1:
+ info.source.listFormulas(args[0])
+ else:
+ raise CommandException
+
+ return
+
+ if command == 'samples':
+ if len(args):
+ raise CommandException
+
+ info.source.listTicks(runs)
+ return
+
+ if len(args):
+ raise CommandException
+
+ if command == 'usertime':
+ import copy
+ kernel = copy.copy(system.full_cpu.numCycles)
+ kernel.bins = 'kernel'
+
+ user = copy.copy(system.full_cpu.numCycles)
+ user.bins = 'user'
+
+ if graph:
+ graphdata(runs, 'usertime', 'User Fraction',
+ user / system.full_cpu.numCycles)
+ else:
+ printdata(runs, user / system.full_cpu.numCycles)
+ return
+
+ if command == 'ticks':
+ if binned:
+ print 'kernel ticks'
+ system.full_cpu.numCycles.bins = 'kernel'
+ printdata(runs, system.full_cpu.numCycles)
+
+ print 'idle ticks'
+ system.full_cpu.numCycles.bins = 'idle'
+ printdata(runs, system.full_cpu.numCycles)
+
+ print 'user ticks'
+ system.full_cpu.numCycles.bins = 'user'
+ printdata(runs, system.full_cpu.numCycles)
+
+ print 'total ticks'
+
+ system.full_cpu.numCycles.bins = None
+ printdata(runs, system.full_cpu.numCycles)
+ return
+
+ if command == 'packets':
+ packets = system.tsunami.nsgige.rxPackets
+ if graph:
+ graphdata(runs, 'packets', 'Packets', packets)
+ else:
+ printdata(runs, packets)
+ return
+
+ if command == 'ppt' or command == 'tpp':
+ ppt = system.tsunami.nsgige.rxPackets / sim_ticks
+ printdata(runs, ppt, command == 'tpp')
+ return
+
+ if command == 'pps':
+ pps = system.tsunami.nsgige.rxPackets / sim_seconds
+ if graph:
+ graphdata(runs, 'pps', 'Packets/s', pps)
+ else:
+ printdata(runs, pps)
+ return
+
+ if command == 'bpt' or command == 'tpb':
+ bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+ bpt = bytes / sim_ticks * 8
+ if graph:
+ graphdata(runs, 'bpt', 'bps / Hz', bpt)
+ else:
+ printdata(runs, bpt, command == 'tpb')
+ return
+
+ if command == 'bptb' or command == 'tpbb':
+ bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+
+ print 'kernel stats'
+ bytes.bins = 'kernel'
+ printdata(runs, bytes / ticks)
+
+ print 'idle stats'
+ bytes.bins = 'idle'
+ printdata(runs, bytes / ticks)
+
+ print 'user stats'
+ bytes.bins = 'user'
+ printdata(runs, bytes / ticks)
+
+ return
+
+ if command == 'bytes':
+ stat = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+
+ if binned:
+ print '%s kernel stats' % stat.name
+ stat.bins = 'kernel'
+ printdata(runs, stat)
+
+ print '%s idle stats' % stat.name
+ stat.bins = 'idle'
+ printdata(runs, stat)
+
+ print '%s user stats' % stat.name
+ stat.bins = 'user'
+ printdata(runs, stat)
+
+ print '%s total stats' % stat.name
+ stat.bins = None
+
+ printdata(runs, stat)
+ return
+
+ if command == 'rxbps':
+ gbps = system.tsunami.nsgige.rxBandwidth / 1e9
+ if graph:
+ graphdata(runs, 'rxbps', 'Bandwidth (Gbps)', gbps)
+ else:
+ printdata(runs, gbps)
+ return
+
+ if command == 'txbps':
+ gbps = system.tsunami.nsgige.txBandwidth / 1e9
+ if graph:
+ graphdata(runs, 'txbps', 'Bandwidth (Gbps)', gbps)
+ else:
+ printdata(runs, gbps)
+ return
+
+ if command == 'bps':
+ rxbps = system.tsunami.nsgige.rxBandwidth
+ txbps = system.tsunami.nsgige.txBandwidth
+ gbps = (rxbps + txbps) / 1e9
+ if graph:
+ graphdata(runs, 'bps', 'Bandwidth (Gbps)', gbps)
+ else:
+ printdata(runs, gbps)
+ return
+
+ if command == 'misses':
+ stat = system.L3.overall_mshr_misses
+ if binned:
+ print '%s kernel stats' % stat.name
+ stat.bins = 'kernel'
+ printdata(runs, stat)
+
+ print '%s idle stats' % stat.name
+ stat.bins = 'idle'
+ printdata(runs, stat)
+
+ print '%s user stats' % stat.name
+ stat.bins = 'user'
+ printdata(runs, stat)
+
+ print '%s total stats' % stat.name
+
+ stat.bins = None
+ if graph:
+ graphdata(runs, 'misses', 'Overall MSHR Misses', stat)
+ else:
+ printdata(runs, stat)
+ return
+
+ if command == 'mpkb':
+ misses = system.L3.overall_mshr_misses
+ rxbytes = system.tsunami.nsgige.rxBytes
+ txbytes = system.tsunami.nsgige.txBytes
+
+ if binned:
+ print 'mpkb kernel stats'
+ misses.bins = 'kernel'
+ mpkb = misses / ((rxbytes + txbytes) / 1024)
+ printdata(runs, mpkb)
+
+ print 'mpkb idle stats'
+ misses.bins = 'idle'
+ mpkb = misses / ((rxbytes + txbytes) / 1024)
+ printdata(runs, mpkb)
+
+ print 'mpkb user stats'
+ misses.bins = 'user'
+ mpkb = misses / ((rxbytes + txbytes) / 1024)
+ printdata(runs, mpkb)
+
+ print 'mpkb total stats'
+
+ mpkb = misses / ((rxbytes + txbytes) / 1024)
+ misses.bins = None
+ if graph:
+ graphdata(runs, 'mpkb', 'Misses / KB', mpkb)
+ else:
+ printdata(runs, mpkb)
+ return
+
+ if command == 'execute':
+ printdata(runs, system.full_cpu.ISSUE__count)
+ return
+
+ if command == 'commit':
+ printdata(runs, system.full_cpu.COM__count)
+ return
+
+ if command == 'fetch':
+ printdata(runs, system.full_cpu.FETCH__count)
+ return
+
+ if command == 'rxbpp':
+ bpp = system.tsunami.nsgige.rxBytes / system.tsunami.nsgige.rxPackets
+ printdata(run, 8 * bpp)
+ return
+
+ if command == 'txbpp':
+ bpp = system.tsunami.nsgige.txBytes / system.tsunami.nsgige.txPackets
+ printdata(run, 8 * bpp)
+ return
+
+ raise CommandException
+
+
+graph = False
+binned = False
+
+class Options: pass
+
+if __name__ == '__main__':
+ import getpass
+
+ options = Options()
+ options.host = 'zizzer.pool'
+ options.db = None
+ options.passwd = ''
+ options.user = getpass.getuser()
+ options.runs = None
+ options.system = 'client'
+ options.get = None
+
+ opts, args = getopts(sys.argv[1:], '-BEFGd:g:h:pr:s:u:')
+ for o,a in opts:
+ if o == '-B':
+ options.binned = True
+ if o == '-E':
+ printval.mode = 'E'
+ if o == '-F':
+ printval.mode = 'F'
+ if o == '-G':
+ options.graph = True;
+ if o == '-d':
+ options.db = a
+ if o == '-g':
+ options.get = a
+ if o == '-h':
+ options.host = a
+ if o == '-p':
+ options.passwd = getpass.getpass()
+ if o == '-r':
+ options.runs = a
+ if o == '-u':
+ options.user = a
+ if o == '-s':
+ options.system = a
+
+ if len(args) == 0:
+ usage()
+
+ command = args[0]
+ args = args[1:]
+
+ try:
+ commands(options, command, args)
+ except CommandException:
+ usage()