diff options
Diffstat (limited to 'util/stats')
-rw-r--r-- | util/stats/db.py | 415 | ||||
-rw-r--r-- | util/stats/dbinit.py | 388 | ||||
-rw-r--r-- | util/stats/display.py | 124 | ||||
-rw-r--r-- | util/stats/flags.py | 36 | ||||
-rw-r--r-- | util/stats/info.py | 724 | ||||
-rw-r--r-- | util/stats/print.py | 127 | ||||
-rwxr-xr-x | util/stats/stats.py | 478 |
7 files changed, 2292 insertions, 0 deletions
diff --git a/util/stats/db.py b/util/stats/db.py new file mode 100644 index 000000000..4cba82446 --- /dev/null +++ b/util/stats/db.py @@ -0,0 +1,415 @@ +import MySQLdb, re, string + +def statcmp(a, b): + v1 = a.split('.') + v2 = b.split('.') + + last = min(len(v1), len(v2)) - 1 + for i,j in zip(v1[0:last], v2[0:last]): + if i != j: + return cmp(i, j) + + # Special compare for last element. + if len(v1) == len(v2): + return cmp(v1[last], v2[last]) + else: + return cmp(len(v1), len(v2)) + +class RunData: + def __init__(self, row): + self.run = int(row[0]) + self.name = row[1] + self.user = row[2] + self.project = row[3] + +class SubData: + def __init__(self, row): + self.stat = int(row[0]) + self.x = int(row[1]) + self.y = int(row[2]) + self.name = row[3] + self.descr = row[4] + +class Data: + def __init__(self, row): + if len(row) != 5: + raise 'stat db error' + self.stat = int(row[0]) + self.run = int(row[1]) + self.x = int(row[2]) + self.y = int(row[3]) + self.data = float(row[4]) + + def __repr__(self): + return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, + self.run, self.x, self.y, self.data) + +class StatData(object): + def __init__(self, row): + self.stat = int(row[0]) + self.name = row[1] + self.desc = row[2] + self.type = row[3] + self.prereq = int(row[5]) + self.precision = int(row[6]) + + import flags + self.flags = 0 + if int(row[4]): self.flags |= flags.printable + if int(row[7]): self.flags |= flags.nozero + if int(row[8]): self.flags |= flags.nonan + if int(row[9]): self.flags |= flags.total + if int(row[10]): self.flags |= flags.pdf + if int(row[11]): self.flags |= flags.cdf + + if self.type == 'DIST' or self.type == 'VECTORDIST': + self.min = float(row[12]) + self.max = float(row[13]) + self.bktsize = float(row[14]) + self.size = int(row[15]) + + if self.type == 'FORMULA': + self.formula = self.db.allFormulas[self.stat] + +class Node(object): + def __init__(self, name): + self.name = name + def __str__(self): + return name + +class Database(object): + def __init__(self): + self.host = 'zizzer.pool' + self.user = '' + self.passwd = '' + self.db = 'm5stats' + self.cursor = None + + self.allStats = [] + self.allStatIds = {} + self.allStatNames = {} + + self.allSubData = {} + + self.allRuns = [] + self.allRunIds = {} + self.allRunNames = {} + + self.allBins = [] + self.allBinIds = {} + self.allBinNames = {} + + self.allFormulas = {} + + self.stattop = {} + self.statdict = {} + self.statlist = [] + + self.mode = 'sum'; + self.runs = None + self.bins = None + self.ticks = None + self.__dict__['get'] = type(self).sum + + def query(self, sql): + self.cursor.execute(sql) + + def update_dict(self, dict): + dict.update(self.stattop) + + def append(self, stat): + statname = re.sub(':', '__', stat.name) + path = string.split(statname, '.') + pathtop = path[0] + fullname = '' + + x = self + while len(path) > 1: + name = path.pop(0) + if not x.__dict__.has_key(name): + x.__dict__[name] = Node(fullname + name) + x = x.__dict__[name] + fullname = '%s%s.' % (fullname, name) + + name = path.pop(0) + x.__dict__[name] = stat + + self.stattop[pathtop] = self.__dict__[pathtop] + self.statdict[statname] = stat + self.statlist.append(statname) + + def connect(self): + # connect + self.thedb = MySQLdb.connect(db=self.db, + host=self.host, + user=self.user, + passwd=self.passwd) + + # create a cursor + self.cursor = self.thedb.cursor() + + self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project + from runs''') + for result in self.cursor.fetchall(): + run = RunData(result); + self.allRuns.append(run) + self.allRunIds[run.run] = run + self.allRunNames[run.name] = run + + self.query('select * from bins') + for id,name in self.cursor.fetchall(): + self.allBinIds[int(id)] = name + self.allBinNames[name] = int(id) + + self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') + for result in self.cursor.fetchall(): + subdata = SubData(result) + if self.allSubData.has_key(subdata.stat): + self.allSubData[subdata.stat].append(subdata) + else: + self.allSubData[subdata.stat] = [ subdata ] + + self.query('select * from formulas') + for id,formula in self.cursor.fetchall(): + self.allFormulas[int(id)] = formula + + StatData.db = self + self.query('select * from stats') + import info + for result in self.cursor.fetchall(): + stat = info.NewStat(StatData(result)) + self.append(stat) + self.allStats.append(stat) + self.allStatIds[stat.stat] = stat + self.allStatNames[stat.name] = stat + + # Name: listbins + # Desc: Prints all bins matching regex argument, if no argument + # is given all bins are returned + def listBins(self, regex='.*'): + print '%-50s %-10s' % ('bin name', 'id') + print '-' * 61 + names = self.allBinNames.keys() + names.sort() + for name in names: + id = self.allBinNames[name] + print '%-50s %-10d' % (name, id) + + # Name: listruns + # Desc: Prints all runs matching a given user, if no argument + # is given all runs are returned + def listRuns(self, user=None): + print '%-40s %-10s %-5s' % ('run name', 'user', 'id') + print '-' * 62 + for run in self.allRuns: + if user == None or user == run.user: + print '%-40s %-10s %-10d' % (run.name, run.user, run.run) + + # Name: listTicks + # Desc: Prints all samples for a given run + def listTicks(self, run=None): + print "tick" + print "----------------------------------------" + sql = 'select distinct dt_tick from data where dt_stat=1950' + #if run != None: + # sql += ' where dt_run=%d' % run + self.query(sql) + for r in self.cursor.fetchall(): + print r[0] + + # Name: liststats + # Desc: Prints all statistics that appear in the database, + # the optional argument is a regular expression that can + # be used to prune the result set + def listStats(self, regex=None): + print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') + print '-' * 80 + + rx = None + if regex != None: + rx = re.compile(regex) + + stats = [ stat.name for stat in self.allStats ] + stats.sort(statcmp) + for stat in stats: + stat = self.allStatNames[stat] + if rx == None or rx.match(stat.name): + print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) + + # Name: liststats + # Desc: Prints all statistics that appear in the database, + # the optional argument is a regular expression that can + # be used to prune the result set + def listFormulas(self, regex=None): + print '%-60s %s' % ('formula name', 'formula') + print '-' * 80 + + rx = None + if regex != None: + rx = re.compile(regex) + + stats = [ stat.name for stat in self.allStats ] + stats.sort(statcmp) + for stat in stats: + stat = self.allStatNames[stat] + if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): + print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) + + def getStat(self, stats): + if type(stats) is not list: + stats = [ stats ] + + ret = [] + for stat in stats: + if type(stat) is int: + ret.append(self.allStatIds[stat]) + + if type(stat) is str: + rx = re.compile(stat) + for stat in self.allStats: + if rx.match(stat.name): + ret.append(stat) + return ret + + def getBin(self, bins): + if type(bins) is not list: + bins = [ bins ] + + ret = [] + for bin in bins: + if type(bin) is int: + ret.append(bin) + elif type(bin) is str: + ret.append(self.allBinNames[bin]) + else: + for name,id in self.allBinNames.items(): + if bin.match(name): + ret.append(id) + + return ret + + def getNotBin(self, bin): + map = {} + for bin in getBin(bin): + map[bin] = 1 + + ret = [] + for bin in self.allBinIds.keys(): + if not map.has_key(bin): + ret.append(bin) + + return ret + + ######################################### + # get the data + # + def inner(self, op, stat, bins, ticks, group=False): + sql = 'select ' + sql += 'dt_stat as stat, ' + sql += 'dt_run as run, ' + sql += 'dt_x as x, ' + sql += 'dt_y as y, ' + if group: + sql += 'dt_tick as tick, ' + sql += '%s(dt_data) as data ' % op + sql += 'from data ' + sql += 'where ' + + if isinstance(stat, list): + val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) + sql += ' (%s)' % val + else: + sql += ' dt_stat=%d' % stat.stat + + if self.runs != None and len(self.runs): + val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) + sql += ' and (%s)' % val + + if bins != None and len(bins): + val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) + sql += ' and (%s)' % val + + if ticks != None and len(ticks): + val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) + sql += ' and (%s)' % val + + sql += ' group by dt_stat,dt_run,dt_x,dt_y' + if group: + sql += ',dt_tick' + return sql + + def outer(self, op_out, op_in, stat, bins, ticks): + sql = self.inner(op_in, stat, bins, ticks, True) + sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) + sql += 'group by stat,run,x,y' + return sql + + # Name: sum + # Desc: given a run, a stat and an array of samples and bins, + # sum all the bins and then get the standard deviation of the + # samples for non-binned runs. This will just return the average + # of samples, however a bin array still must be passed + def sum(self, stat, bins, ticks): + return self.inner('sum', stat, bins, ticks) + + # Name: avg + # Desc: given a run, a stat and an array of samples and bins, + # sum all the bins and then average the samples for non-binned + # runs this will just return the average of samples, however + # a bin array still must be passed + def avg(self, stat, bins, ticks): + return self.outer('avg', 'sum', stat, bins, ticks) + + # Name: stdev + # Desc: given a run, a stat and an array of samples and bins, + # sum all the bins and then get the standard deviation of the + # samples for non-binned runs. This will just return the average + # of samples, however a bin array still must be passed + def stdev(self, stat, bins, ticks): + return self.outer('stddev', 'sum', stat, bins, ticks) + + def __getattribute__(self, attr): + if attr != 'get': + return super(Database, self).__getattribute__(attr) + + if self.__dict__['get'] == type(self).sum: + return 'sum' + elif self.__dict__['get'] == type(self).avg: + return 'avg' + elif self.__dict__['get'] == type(self).stdev: + return 'stdev' + else: + return '' + + def __setattr__(self, attr, value): + if attr != 'get': + super(Database, self).__setattr__(attr, value) + return + + if value == 'sum': + self.__dict__['get'] = type(self).sum + elif value == 'avg': + self.__dict__['get'] = type(self).avg + elif value == 'stdev': + self.__dict__['get'] = type(self).stdev + else: + raise AttributeError, "can only set get to: sum | avg | stdev" + + def data(self, stat, bins=None, ticks=None): + if bins is None: + bins = self.bins + if ticks is None: + ticks = self.ticks + sql = self.__dict__['get'](self, stat, bins, ticks) + self.query(sql) + + runs = {} + for x in self.cursor.fetchall(): + data = Data(x) + if not runs.has_key(data.run): + runs[data.run] = {} + if not runs[data.run].has_key(data.x): + runs[data.run][data.x] = {} + + runs[data.run][data.x][data.y] = data.data + return runs diff --git a/util/stats/dbinit.py b/util/stats/dbinit.py new file mode 100644 index 000000000..686f55c98 --- /dev/null +++ b/util/stats/dbinit.py @@ -0,0 +1,388 @@ +import MySQLdb + +class MyDB(object): + def __init__(self, options): + self.name = options.db + self.host = options.host + self.user = options.user + self.passwd = options.passwd + self.mydb = None + self.cursor = None + + def admin(self): + self.close() + self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user, + passwd=self.passwd) + self.cursor = self.mydb.cursor() + + def connect(self): + self.close() + self.mydb = MySQLdb.connect(db=self.name, host=self.host, + user=self.user, passwd=self.passwd) + self.cursor = self.mydb.cursor() + + def close(self): + if self.mydb is not None: + self.mydb.close() + self.cursor = None + + def query(self, sql): + self.cursor.execute(sql) + + def drop(self): + self.query('DROP DATABASE IF EXISTS %s' % self.name) + + def create(self): + self.query('CREATE DATABASE %s' % self.name) + + def populate(self): + # + # Each run (or simulation) gets its own entry in the runs table to + # group stats by where they were generated + # + # COLUMNS: + # 'id' is a unique identifier for each run to be used in other + # tables. + # 'name' is the user designated name for the data generated. It is + # configured in the simulator. + # 'user' identifies the user that generated the data for the given + # run. + # 'project' another name to identify runs for a specific goal + # 'date' is a timestamp for when the data was generated. It can be + # used to easily expire data that was generated in the past. + # 'expire' is a timestamp for when the data should be removed from + # the database so we don't have years worth of junk. + # + # INDEXES: + # 'run' is indexed so you can find out details of a run if the run + # was retreived from the data table. + # 'name' is indexed so that two all run names are forced to be unique + # + self.query(''' + CREATE TABLE runs( + rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, + rn_name VARCHAR(200) NOT NULL, + rn_sample VARCHAR(32) NOT NULL, + rn_user VARCHAR(32) NOT NULL, + rn_project VARCHAR(100) NOT NULL, + rn_date TIMESTAMP NOT NULL, + rn_expire TIMESTAMP NOT NULL, + PRIMARY KEY (rn_id), + UNIQUE (rn_name,rn_sample) + ) TYPE=InnoDB''') + + # + # We keep the bin names separate so that the data table doesn't get + # huge since bin names are frequently repeated. + # + # COLUMNS: + # 'id' is the unique bin identifer. + # 'name' is the string name for the bin. + # + # INDEXES: + # 'bin' is indexed to get the name of a bin when data is retrieved + # via the data table. + # 'name' is indexed to get the bin id for a named bin when you want + # to search the data table based on a specific bin. + # + self.query(''' + CREATE TABLE bins( + bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, + bn_name VARCHAR(255) NOT NULL, + PRIMARY KEY(bn_id), + UNIQUE (bn_name) + ) TYPE=InnoDB''') + + # + # The stat table gives us all of the data for a particular stat. + # + # COLUMNS: + # 'stat' is a unique identifier for each stat to be used in other + # tables for references. + # 'name' is simply the simulator derived name for a given + # statistic. + # 'descr' is the description of the statistic and what it tells + # you. + # 'type' defines what the stat tells you. Types are: + # SCALAR: A simple scalar statistic that holds one value + # VECTOR: An array of statistic values. Such a something that + # is generated per-thread. Vectors exist to give averages, + # pdfs, cdfs, means, standard deviations, etc across the + # stat values. + # DIST: Is a distribution of data. When the statistic value is + # sampled, its value is counted in a particular bucket. + # Useful for keeping track of utilization of a resource. + # (e.g. fraction of time it is 25% used vs. 50% vs. 100%) + # VECTORDIST: Can be used when the distribution needs to be + # factored out into a per-thread distribution of data for + # example. It can still be summed across threads to find + # the total distribution. + # VECTOR2D: Can be used when you have a stat that is not only + # per-thread, but it is per-something else. Like + # per-message type. + # FORMULA: This statistic is a formula, and its data must be + # looked up in the formula table, for indicating how to + # present its values. + # 'subdata' is potentially used by any of the vector types to + # give a specific name to all of the data elements within a + # stat. + # 'print' indicates whether this stat should be printed ever. + # (Unnamed stats don't usually get printed) + # 'prereq' only print the stat if the prereq is not zero. + # 'prec' number of decimal places to print + # 'nozero' don't print zero values + # 'nonan' don't print NaN values + # 'total' for vector type stats, print the total. + # 'pdf' for vector type stats, print the pdf. + # 'cdf' for vector type stats, print the cdf. + # + # The Following are for dist type stats: + # 'min' is the minimum bucket value. Anything less is an underflow. + # 'max' is the maximum bucket value. Anything more is an overflow. + # 'bktsize' is the approximate number of entries in each bucket. + # 'size' is the number of buckets. equal to (min/max)/bktsize. + # + # INDEXES: + # 'stat' is indexed so that you can find out details about a stat + # if the stat id was retrieved from the data table. + # 'name' is indexed so that you can simply look up data about a + # named stat. + # + self.query(''' + CREATE TABLE stats( + st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, + st_name VARCHAR(255) NOT NULL, + st_descr TEXT NOT NULL, + st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST", + "VECTOR2D", "FORMULA") NOT NULL, + st_print BOOL NOT NULL, + st_prereq SMALLINT UNSIGNED NOT NULL, + st_prec TINYINT NOT NULL, + st_nozero BOOL NOT NULL, + st_nonan BOOL NOT NULL, + st_total BOOL NOT NULL, + st_pdf BOOL NOT NULL, + st_cdf BOOL NOT NULL, + st_min DOUBLE NOT NULL, + st_max DOUBLE NOT NULL, + st_bktsize DOUBLE NOT NULL, + st_size SMALLINT UNSIGNED NOT NULL, + PRIMARY KEY (st_id), + UNIQUE (st_name) + ) TYPE=InnoDB''') + + # + # This is the main table of data for stats. + # + # COLUMNS: + # 'stat' refers to the stat field given in the stat table. + # + # 'x' referrs to the first dimension of a multi-dimensional stat. For + # a vector, x will start at 0 and increase for each vector + # element. + # For a distribution: + # -1: sum (for calculating standard deviation) + # -2: sum of squares (for calculating standard deviation) + # -3: total number of samples taken (for calculating + # standard deviation) + # -4: minimum value + # -5: maximum value + # -6: underflow + # -7: overflow + # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second + # dimension. + # 'run' is the run that the data was generated from. Details up in + # the run table + # 'tick' is a timestamp generated by the simulator. + # 'bin' is the name of the bin that the data was generated in, if + # any. + # 'data' is the actual stat value. + # + # INDEXES: + # 'stat' is indexed so that a user can find all of the data for a + # particular stat. It is not unique, because that specific stat + # can be found in many runs, bins, and samples, in addition to + # having entries for the mulidimensional cases. + # 'run' is indexed to allow a user to remove all of the data for a + # particular execution run. It can also be used to allow the + # user to print out all of the data for a given run. + # + self.query(''' + CREATE TABLE data( + dt_stat SMALLINT UNSIGNED NOT NULL, + dt_x SMALLINT NOT NULL, + dt_y SMALLINT NOT NULL, + dt_run SMALLINT UNSIGNED NOT NULL, + dt_tick BIGINT UNSIGNED NOT NULL, + dt_bin SMALLINT UNSIGNED NOT NULL, + dt_data DOUBLE NOT NULL, + INDEX (dt_stat), + INDEX (dt_run), + UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin) + ) TYPE=InnoDB;''') + + # + # Names and descriptions for multi-dimensional stats (vectors, etc.) + # are stored here instead of having their own entry in the statistics + # table. This allows all parts of a single stat to easily share a + # single id. + # + # COLUMNS: + # 'stat' is the unique stat identifier from the stat table. + # 'x' is the first dimension for multi-dimensional stats + # corresponding to the data table above. + # 'y' is the second dimension for multi-dimensional stats + # corresponding to the data table above. + # 'name' is the specific subname for the unique stat,x,y combination. + # 'descr' is the specific description for the uniqe stat,x,y + # combination. + # + # INDEXES: + # 'stat' is indexed so you can get the subdata for a specific stat. + # + self.query(''' + CREATE TABLE subdata( + sd_stat SMALLINT UNSIGNED NOT NULL, + sd_x SMALLINT NOT NULL, + sd_y SMALLINT NOT NULL, + sd_name VARCHAR(255) NOT NULL, + sd_descr TEXT, + UNIQUE (sd_stat,sd_x,sd_y) + ) TYPE=InnoDB''') + + + # + # The formula table is maintained separately from the data table + # because formula data, unlike other stat data cannot be represented + # there. + # + # COLUMNS: + # 'stat' refers to the stat field generated in the stat table. + # 'formula' is the actual string representation of the formula + # itself. + # + # INDEXES: + # 'stat' is indexed so that you can just look up a formula. + # + self.query(''' + CREATE TABLE formulas( + fm_stat SMALLINT UNSIGNED NOT NULL, + fm_formula BLOB NOT NULL, + PRIMARY KEY(fm_stat) + ) TYPE=InnoDB''') + + # + # Each stat used in each formula is kept in this table. This way, if + # you want to print out a particular formula, you can simply find out + # which stats you need by looking in this table. Additionally, when + # you remove a stat from the stats table and data table, you remove + # any references to the formula in this table. When a formula is no + # longer referred to, you remove its entry. + # + # COLUMNS: + # 'stat' is the stat id from the stat table above. + # 'child' is the stat id of a stat that is used for this formula. + # There may be many children for any given 'stat' (formula) + # + # INDEXES: + # 'stat' is indexed so you can look up all of the children for a + # particular stat. + # 'child' is indexed so that you can remove an entry when a stat is + # removed. + # + self.query(''' + CREATE TABLE formula_ref( + fr_stat SMALLINT UNSIGNED NOT NULL, + fr_run SMALLINT UNSIGNED NOT NULL, + UNIQUE (fr_stat,fr_run), + INDEX (fr_stat), + INDEX (fr_run) + ) TYPE=InnoDB''') + + # COLUMNS: + # 'event' is the unique event id from the event_desc table + # 'run' is simulation run id that this event took place in + # 'tick' is the tick when the event happened + # + # INDEXES: + # 'event' is indexed so you can look up all occurences of a + # specific event + # 'run' is indexed so you can find all events in a run + # 'tick' is indexed because we want the unique thing anyway + # 'event,run,tick' is unique combination + self.query(''' + CREATE TABLE events( + ev_event SMALLINT UNSIGNED NOT NULL, + ev_run SMALLINT UNSIGNED NOT NULL, + ev_tick BIGINT UNSIGNED NOT NULL, + INDEX(ev_event), + INDEX(ev_run), + INDEX(ev_tick), + UNIQUE(ev_event,ev_run,ev_tick) + ) TYPE=InnoDB''') + + # COLUMNS: + # 'id' is the unique description id + # 'name' is the name of the event that occurred + # + # INDEXES: + # 'id' is indexed because it is the primary key and is what you use + # to look up the descriptions + # 'name' is indexed so one can find the event based on name + # + self.query(''' + CREATE TABLE event_names( + en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, + en_name VARCHAR(255) NOT NULL, + PRIMARY KEY (en_id), + UNIQUE (en_name) + ) TYPE=InnoDB''') + + def clean(self): + self.query(''' + DELETE data + FROM data + LEFT JOIN runs ON dt_run=rn_id + WHERE rn_id IS NULL''') + + self.query(''' + DELETE formula_ref + FROM formula_ref + LEFT JOIN runs ON fr_run=rn_id + WHERE rn_id IS NULL''') + + self.query(''' + DELETE formulas + FROM formulas + LEFT JOIN formula_ref ON fm_stat=fr_stat + WHERE fr_stat IS NULL''') + + self.query(''' + DELETE stats + FROM stats + LEFT JOIN data ON st_id=dt_stat + WHERE dt_stat IS NULL''') + + self.query(''' + DELETE subdata + FROM subdata + LEFT JOIN data ON sd_stat=dt_stat + WHERE dt_stat IS NULL''') + + self.query(''' + DELETE bins + FROM bins + LEFT JOIN data ON bn_id=dt_bin + WHERE dt_bin IS NULL''') + + self.query(''' + DELETE events + FROM events + LEFT JOIN runs ON ev_run=rn_id + WHERE rn_id IS NULL''') + + self.query(''' + DELETE event_names + FROM event_names + LEFT JOIN events ON en_id=ev_event + WHERE ev_event IS NULL''') diff --git a/util/stats/display.py b/util/stats/display.py new file mode 100644 index 000000000..68a26852d --- /dev/null +++ b/util/stats/display.py @@ -0,0 +1,124 @@ +class Value: + def __init__(self, value, precision, percent = False): + self.value = value + self.precision = precision + self.percent = percent + def __str__(self): + if isinstance(self.value, str): + if self.value.lower() == 'nan': + value = 'NaN' + if self.value.lower() == 'inf': + value = 'Inf' + else: + if self.precision >= 0: + format = "%%.%df" % self.precision + elif self.value == 0.0: + format = "%.0f" + elif self.value % 1.0 == 0.0: + format = "%.0f" + else: + format = "%f" + value = self.value + if self.percent: + value = value * 100.0 + value = format % value + + if self.percent: + value = value + "%" + + return value + +class Print: + def __init__(self, **vals): + self.__dict__.update(vals) + + def __str__(self): + value = Value(self.value, self.precision) + pdf = '' + cdf = '' + if self.__dict__.has_key('pdf'): + pdf = Value(self.pdf, 2, True) + if self.__dict__.has_key('cdf'): + cdf = Value(self.cdf, 2, True) + + output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf) + + if descriptions and self.__dict__.has_key('desc') and self.desc: + output = "%s # %s" % (output, self.desc) + + return output + + def doprint(self): + if display_all: + return True + if self.value == 0.0 and (self.flags & flags_nozero): + return False + if isinstance(self.value, str): + if self.value == 'NaN' and (self.flags & flags_nonan): + return False + return True + + def display(self): + if self.doprint(): + print self + +class VectorDisplay: + def display(self): + p = Print() + p.flags = self.flags + p.precision = self.precision + + if issequence(self.value): + if not len(self.value): + return + + mytotal = reduce(lambda x,y: float(x) + float(y), self.value) + mycdf = 0.0 + + value = self.value + + if display_all: + subnames = [ '[%d]' % i for i in range(len(value)) ] + else: + subnames = [''] * len(value) + + if self.__dict__.has_key('subnames'): + for i,each in enumerate(self.subnames): + if len(each) > 0: + subnames[i] = '.%s' % each + + subdescs = [self.desc]*len(value) + if self.__dict__.has_key('subdescs'): + for i in xrange(min(len(value), len(self.subdescs))): + subdescs[i] = self.subdescs[i] + + for val,sname,sdesc in map(None, value, subnames, subdescs): + if mytotal > 0.0: + mypdf = float(val) / float(mytotal) + mycdf += mypdf + if (self.flags & flags_pdf): + p.pdf = mypdf + p.cdf = mycdf + + if len(sname) == 0: + continue + + p.name = self.name + sname + p.desc = sdesc + p.value = val + p.display() + + if (self.flags & flags_total): + if (p.__dict__.has_key('pdf')): del p.__dict__['pdf'] + if (p.__dict__.has_key('cdf')): del p.__dict__['cdf'] + p.name = self.name + '.total' + p.desc = self.desc + p.value = mytotal + p.display() + + else: + p.name = self.name + p.desc = self.desc + p.value = self.value + p.display() + diff --git a/util/stats/flags.py b/util/stats/flags.py new file mode 100644 index 000000000..7a57e722b --- /dev/null +++ b/util/stats/flags.py @@ -0,0 +1,36 @@ +# Copyright (c) 2004 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +init = 0x00000001 +printable = 0x00000002 +total = 0x00000010 +pdf = 0x00000020 +cdf = 0x00000040 +dist = 0x00000080 +nozero = 0x00000100 +nonan = 0x00000200 diff --git a/util/stats/info.py b/util/stats/info.py new file mode 100644 index 000000000..a94563cf9 --- /dev/null +++ b/util/stats/info.py @@ -0,0 +1,724 @@ +from __future__ import division +import operator, re, types + +source = None +display_run = 0 + +def issequence(t): + return isinstance(t, types.TupleType) or isinstance(t, types.ListType) + +def total(f): + if isinstance(f, FormulaStat): + v = f.value + else: + v = f + + f = FormulaStat() + if issequence(v): + f.value = reduce(operator.add, v) + else: + f.value = v + + return f + +def unaryop(op, f): + if isinstance(f, FormulaStat): + v = f.value + else: + v = f + + if issequence(v): + return map(op, v) + else: + return op(v) + +def zerodiv(lv, rv): + if rv == 0.0: + return 0.0 + else: + return operator.truediv(lv, rv) + +def wrapop(op, lv, rv): + if isinstance(lv, str): + return lv + + if isinstance(rv, str): + return rv + + return op(lv, rv) + +def same(lv, rv): + for lrun,rrun in zip(lv.keys(),rv.keys()): + if lrun != rrun: + print 'lrun != rrun' + print lrun, rrun + print lv.keys() + print rv.keys() + return False + for lx,rx in zip(lv[lrun].keys(),rv[rrun].keys()): + if lx != rx: + print 'lx != rx' + print lx, rx + print lv[lrun].keys() + print rv[rrun].keys() + return False + for ly,ry in zip(lv[lrun][lx].keys(),rv[rrun][rx].keys()): + if ly != ry: + print 'ly != ry' + print ly, ry + print lv[lrun][lx].keys() + print rv[rrun][rx].keys() + return False + return True + + +def binaryop(op, lf, rf): + result = {} + + if isinstance(lf, FormulaStat) and isinstance(rf, FormulaStat): + lv = lf.value + rv = rf.value + + if not same(lv, rv): + raise AttributeError, "run,x,y not identical" + + for run in lv.keys(): + result[run] = {} + for x in lv[run].keys(): + result[run][x] = {} + for y in lv[run][x].keys(): + result[run][x][y] = wrapop(op, lv[run][x][y], + rv[run][x][y]) + elif isinstance(lf, FormulaStat): + lv = lf.value + for run in lv.keys(): + result[run] = {} + for x in lv[run].keys(): + result[run][x] = {} + for y in lv[run][x].keys(): + result[run][x][y] = wrapop(op, lv[run][x][y], rf) + elif isinstance(rf, FormulaStat): + rv = rf.value + for run in rv.keys(): + result[run] = {} + for x in rv[run].keys(): + result[run][x] = {} + for y in rv[run][x].keys(): + result[run][x][y] = wrapop(op, lf, rv[run][x][y]) + + return result + +def sums(x, y): + if issequence(x): + return map(lambda x, y: x + y, x, y) + else: + return x + y + +def alltrue(list): + return reduce(lambda x, y: x and y, list) + +def allfalse(list): + return not reduce(lambda x, y: x or y, list) + +def enumerate(list): + return map(None, range(len(list)), list) + +def cmp(a, b): + if a < b: + return -1 + elif a == b: + return 0 + else: + return 1 + +class Statistic(object): + def __init__(self, data): + self.__dict__.update(data.__dict__) + if not self.__dict__.has_key('value'): + self.__dict__['value'] = None + if not self.__dict__.has_key('bins'): + self.__dict__['bins'] = None + if not self.__dict__.has_key('ticks'): + self.__dict__['ticks'] = None + + def __getattribute__(self, attr): + if attr == 'value': + if self.__dict__['value'] == None: + self.__dict__['value'] = self.getValue() + return self.__dict__['value'] + else: + return super(Statistic, self).__getattribute__(attr) + + def __setattr__(self, attr, value): + if attr == 'bins' or attr == 'ticks': + if attr == 'bins': + global db + if value is not None: + value = db.getBin(value) + elif attr == 'samples' and type(value) is str: + value = [ int(x) for x in value.split() ] + + self.__dict__[attr] = value + self.__dict__['value'] = None + else: + super(Statistic, self).__setattr__(attr, value) + + def getValue(self): + raise AttributeError, 'getValue() must be defined' + + def zero(self): + return False + + def __ne__(self, other): + return not (self == other) + + def __str__(self): + return '%f' % (float(self)) + +class FormulaStat(object): + def __add__(self, other): + f = FormulaStat() + f.value = binaryop(operator.add, self, other) + return f + def __sub__(self, other): + f = FormulaStat() + f.value = binaryop(operator.sub, self, other) + return f + def __mul__(self, other): + f = FormulaStat() + f.value = binaryop(operator.mul, self, other) + return f + def __truediv__(self, other): + f = FormulaStat() + f.value = binaryop(zerodiv, self, other) + return f + def __mod__(self, other): + f = FormulaStat() + f.value = binaryop(operator.mod, self, other) + return f + def __radd__(self, other): + f = FormulaStat() + f.value = binaryop(operator.add, other, self) + return f + def __rsub__(self, other): + f = FormulaStat() + f.value = binaryop(operator.sub, other, self) + return f + def __rmul__(self, other): + f = FormulaStat() + f.value = binaryop(operator.mul, other, self) + return f + def __rtruediv__(self, other): + f = FormulaStat() + f.value = binaryop(zerodiv, other, self) + return f + def __rmod__(self, other): + f = FormulaStat() + f.value = binaryop(operator.mod, other, self) + return f + def __neg__(self): + f = FormulaStat() + f.value = unaryop(operator.neg, self) + return f + def __getitem__(self, idx): + f = FormulaStat() + f.value = {} + for key in self.value.keys(): + f.value[key] = {} + f.value[key][0] = {} + f.value[key][0][0] = self.value[key][idx][0] + return f + + def __float__(self): + if isinstance(self.value, FormulaStat): + return float(self.value) + if not self.value.has_key(display_run): + return (1e300*1e300) + if len(self.value[display_run]) == 1: + return self.value[display_run][0][0] + else: + #print self.value[display_run] + return self.value[display_run][4][0] + #raise ValueError + + def display(self): + import display + d = display.VectorDisplay() + d.flags = 0 + d.precision = 1 + d.name = 'formula' + d.desc = 'formula' + val = self.value[display_run] + d.value = [ val[x][0] for x in val.keys() ] + d.display() + + +class Scalar(Statistic,FormulaStat): + def getValue(self): + return source.data(self, self.bins) + + def display(self): + import display + p = display.Print() + p.name = self.name + p.desc = self.desc + p.value = float(self) + p.flags = self.flags + p.precision = self.precision + if display.all or (self.flags & flags.printable): + p.display() + + def comparable(self, other): + return self.name == other.name + + def __eq__(self, other): + return self.value == other.value + + def __isub__(self, other): + self.value -= other.value + return self + + def __iadd__(self, other): + self.value += other.value + return self + + def __itruediv__(self, other): + if not other: + return self + self.value /= other + return self + +class Vector(Statistic,FormulaStat): + def getValue(self): + return source.data(self, self.bins); + + def display(self): + import display + if not display.all and not (self.flags & flags.printable): + return + + d = display.VectorDisplay() + d.__dict__.update(self.__dict__) + d.display() + + def comparable(self, other): + return self.name == other.name and \ + len(self.value) == len(other.value) + + def __eq__(self, other): + if issequence(self.value) != issequence(other.value): + return false + + if issequence(self.value): + if len(self.value) != len(other.value): + return False + else: + for v1,v2 in zip(self.value, other.value): + if v1 != v2: + return False + return True + else: + return self.value == other.value + + def __isub__(self, other): + self.value = binaryop(operator.sub, self.value, other.value) + return self + + def __iadd__(self, other): + self.value = binaryop(operator.add, self.value, other.value) + return self + + def __itruediv__(self, other): + if not other: + return self + if issequence(self.value): + for i in xrange(len(self.value)): + self.value[i] /= other + else: + self.value /= other + return self + +class Formula(Vector): + def getValue(self): + formula = re.sub(':', '__', self.formula) + x = eval(formula, source.stattop) + return x.value + + def comparable(self, other): + return self.name == other.name and \ + compare(self.dist, other.dist) + + def __eq__(self, other): + return self.value == other.value + + def __isub__(self, other): + return self + + def __iadd__(self, other): + return self + + def __itruediv__(self, other): + if not other: + return self + return self + +class SimpleDist(object): + def __init__(self, sums, squares, samples): + self.sums = sums + self.squares = squares + self.samples = samples + + def getValue(self): + return 0.0 + + def display(self, name, desc, flags, precision): + import display + p = display.Print() + p.flags = flags + p.precision = precision + + if self.samples > 0: + p.name = name + ".mean" + p.value = self.sums / self.samples + p.display() + + p.name = name + ".stdev" + if self.samples > 1: + var = (self.samples * self.squares - self.sums ** 2) \ + / (self.samples * (self.samples - 1)) + if var >= 0: + p.value = math.sqrt(var) + else: + p.value = 'NaN' + else: + p.value = 0.0 + p.display() + + p.name = name + ".samples" + p.value = self.samples + p.display() + + def comparable(self, other): + return True + + def __eq__(self, other): + return self.sums == other.sums and self.squares == other.squares and \ + self.samples == other.samples + + def __isub__(self, other): + self.sums -= other.sums + self.squares -= other.squares + self.samples -= other.samples + return self + + def __iadd__(self, other): + self.sums += other.sums + self.squares += other.squares + self.samples += other.samples + return self + + def __itruediv__(self, other): + if not other: + return self + self.sums /= other + self.squares /= other + self.samples /= other + return self + +class FullDist(SimpleDist): + def __init__(self, sums, squares, samples, minval, maxval, + under, vec, over, min, max, bsize, size): + self.sums = sums + self.squares = squares + self.samples = samples + self.minval = minval + self.maxval = maxval + self.under = under + self.vec = vec + self.over = over + self.min = min + self.max = max + self.bsize = bsize + self.size = size + + def getValue(self): + return 0.0 + + def display(self, name, desc, flags, precision): + import display + p = display.Print() + p.flags = flags + p.precision = precision + + p.name = name + '.min_val' + p.value = self.minval + p.display() + + p.name = name + '.max_val' + p.value = self.maxval + p.display() + + p.name = name + '.underflow' + p.value = self.under + p.display() + + i = self.min + for val in self.vec[:-1]: + p.name = name + '[%d:%d]' % (i, i + self.bsize - 1) + p.value = val + p.display() + i += self.bsize + + p.name = name + '[%d:%d]' % (i, self.max) + p.value = self.vec[-1] + p.display() + + + p.name = name + '.overflow' + p.value = self.over + p.display() + + SimpleDist.display(self, name, desc, flags, precision) + + def comparable(self, other): + return self.min == other.min and self.max == other.max and \ + self.bsize == other.bsize and self.size == other.size + + def __eq__(self, other): + return self.sums == other.sums and self.squares == other.squares and \ + self.samples == other.samples + + def __isub__(self, other): + self.sums -= other.sums + self.squares -= other.squares + self.samples -= other.samples + + if other.samples: + self.minval = min(self.minval, other.minval) + self.maxval = max(self.maxval, other.maxval) + self.under -= under + self.vec = map(lambda x,y: x - y, self.vec, other.vec) + self.over -= over + return self + + def __iadd__(self, other): + if not self.samples and other.samples: + self = other + return self + + self.sums += other.sums + self.squares += other.squares + self.samples += other.samples + + if other.samples: + self.minval = min(self.minval, other.minval) + self.maxval = max(self.maxval, other.maxval) + self.under += other.under + self.vec = map(lambda x,y: x + y, self.vec, other.vec) + self.over += other.over + return self + + def __itruediv__(self, other): + if not other: + return self + self.sums /= other + self.squares /= other + self.samples /= other + + if self.samples: + self.under /= other + for i in xrange(len(self.vec)): + self.vec[i] /= other + self.over /= other + return self + +class Dist(Statistic): + def getValue(self): + return 0.0 + + def display(self): + import display + if not display.all and not (self.flags & flags.printable): + return + + self.dist.display(self.name, self.desc, self.flags, self.precision) + + def comparable(self, other): + return self.name == other.name and \ + self.dist.compareable(other.dist) + + def __eq__(self, other): + return self.dist == other.dist + + def __isub__(self, other): + self.dist -= other.dist + return self + + def __iadd__(self, other): + self.dist += other.dist + return self + + def __itruediv__(self, other): + if not other: + return self + self.dist /= other + return self + +class VectorDist(Statistic): + def getValue(self): + return 0.0 + + def display(self): + import display + if not display.all and not (self.flags & flags.printable): + return + + if isinstance(self.dist, SimpleDist): + return + + for dist,sn,sd,i in map(None, self.dist, self.subnames, self.subdescs, + range(len(self.dist))): + if len(sn) > 0: + name = '%s.%s' % (self.name, sn) + else: + name = '%s[%d]' % (self.name, i) + + if len(sd) > 0: + desc = sd + else: + desc = self.desc + + dist.display(name, desc, self.flags, self.precision) + + if (self.flags & flags.total) or 1: + if isinstance(self.dist[0], SimpleDist): + disttotal = SimpleDist( \ + reduce(sums, [d.sums for d in self.dist]), + reduce(sums, [d.squares for d in self.dist]), + reduce(sums, [d.samples for d in self.dist])) + else: + disttotal = FullDist( \ + reduce(sums, [d.sums for d in self.dist]), + reduce(sums, [d.squares for d in self.dist]), + reduce(sums, [d.samples for d in self.dist]), + min([d.minval for d in self.dist]), + max([d.maxval for d in self.dist]), + reduce(sums, [d.under for d in self.dist]), + reduce(sums, [d.vec for d in self.dist]), + reduce(sums, [d.over for d in self.dist]), + dist[0].min, + dist[0].max, + dist[0].bsize, + dist[0].size) + + name = '%s.total' % (self.name) + desc = self.desc + disttotal.display(name, desc, self.flags, self.precision) + + def comparable(self, other): + return self.name == other.name and \ + alltrue(map(lambda x, y : x.comparable(y), + self.dist, + other.dist)) + + def __eq__(self, other): + return alltrue(map(lambda x, y : x == y, self.dist, other.dist)) + + def __isub__(self, other): + if issequence(self.dist) and issequence(other.dist): + for sd,od in zip(self.dist, other.dist): + sd -= od + else: + self.dist -= other.dist + return self + + def __iadd__(self, other): + if issequence(self.dist) and issequence(other.dist): + for sd,od in zip(self.dist, other.dist): + sd += od + else: + self.dist += other.dist + return self + + def __itruediv__(self, other): + if not other: + return self + if issequence(self.dist): + for dist in self.dist: + dist /= other + else: + self.dist /= other + return self + +class Vector2d(Statistic): + def getValue(self): + return 0.0 + + def display(self): + import display + if not display.all and not (self.flags & flags.printable): + return + + d = display.VectorDisplay() + d.__dict__.update(self.__dict__) + + if self.__dict__.has_key('ysubnames'): + ysubnames = list(self.ysubnames) + slack = self.x - len(ysubnames) + if slack > 0: + ysubnames.extend(['']*slack) + else: + ysubnames = range(self.x) + + for x,sname in enumerate(ysubnames): + o = x * self.y + d.value = self.value[o:o+self.y] + d.name = '%s[%s]' % (self.name, sname) + d.display() + + if self.flags & flags.total: + d.value = [] + for y in range(self.y): + xtot = 0.0 + for x in range(self.x): + xtot += self.value[y + x * self.x] + d.value.append(xtot) + + d.name = self.name + '.total' + d.display() + + def comparable(self, other): + return self.name == other.name and self.x == other.x and \ + self.y == other.y + + def __eq__(self, other): + return True + + def __isub__(self, other): + return self + + def __iadd__(self, other): + return self + + def __itruediv__(self, other): + if not other: + return self + return self + +def NewStat(data): + stat = None + if data.type == 'SCALAR': + stat = Scalar(data) + elif data.type == 'VECTOR': + stat = Vector(data) + elif data.type == 'DIST': + stat = Dist(data) + elif data.type == 'VECTORDIST': + stat = VectorDist(data) + elif data.type == 'VECTOR2D': + stat = Vector2d(data) + elif data.type == 'FORMULA': + stat = Formula(data) + + return stat + diff --git a/util/stats/print.py b/util/stats/print.py new file mode 100644 index 000000000..f4492cd2b --- /dev/null +++ b/util/stats/print.py @@ -0,0 +1,127 @@ +all = False +descriptions = False + +class Value: + def __init__(self, value, precision, percent = False): + self.value = value + self.precision = precision + self.percent = percent + def __str__(self): + if isinstance(self.value, str): + if self.value.lower() == 'nan': + value = 'NaN' + if self.value.lower() == 'inf': + value = 'Inf' + else: + if self.precision >= 0: + format = "%%.%df" % self.precision + elif self.value == 0.0: + format = "%.0f" + elif self.value % 1.0 == 0.0: + format = "%.0f" + else: + format = "%f" + value = self.value + if self.percent: + value = value * 100.0 + value = format % value + + if self.percent: + value = value + "%" + + return value + +class Print: + def __init__(self, **vals): + self.__dict__.update(vals) + + def __str__(self): + value = Value(self.value, self.precision) + pdf = '' + cdf = '' + if self.__dict__.has_key('pdf'): + pdf = Value(self.pdf, 2, True) + if self.__dict__.has_key('cdf'): + cdf = Value(self.cdf, 2, True) + + output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf) + + if descriptions and self.__dict__.has_key('desc') and self.desc: + output = "%s # %s" % (output, self.desc) + + return output + + def doprint(self): + if display_all: + return True + if self.value == 0.0 and (self.flags & flags_nozero): + return False + if isinstance(self.value, str): + if self.value == 'NaN' and (self.flags & flags_nonan): + return False + return True + + def display(self): + if self.doprint(): + print self + +class VectorDisplay: + def display(self): + p = Print() + p.flags = self.flags + p.precision = self.precision + + if issequence(self.value): + if not len(self.value): + return + + mytotal = reduce(lambda x,y: float(x) + float(y), self.value) + mycdf = 0.0 + + value = self.value + + if display_all: + subnames = [ '[%d]' % i for i in range(len(value)) ] + else: + subnames = [''] * len(value) + + if self.__dict__.has_key('subnames'): + for i,each in enumerate(self.subnames): + if len(each) > 0: + subnames[i] = '.%s' % each + + subdescs = [self.desc]*len(value) + if self.__dict__.has_key('subdescs'): + for i in xrange(min(len(value), len(self.subdescs))): + subdescs[i] = self.subdescs[i] + + for val,sname,sdesc in map(None, value, subnames, subdescs): + if mytotal > 0.0: + mypdf = float(val) / float(mytotal) + mycdf += mypdf + if (self.flags & flags_pdf): + p.pdf = mypdf + p.cdf = mycdf + + if len(sname) == 0: + continue + + p.name = self.name + sname + p.desc = sdesc + p.value = val + p.display() + + if (self.flags & flags_total): + if (p.__dict__.has_key('pdf')): del p.__dict__['pdf'] + if (p.__dict__.has_key('cdf')): del p.__dict__['cdf'] + p.name = self.name + '.total' + p.desc = self.desc + p.value = mytotal + p.display() + + else: + p.name = self.name + p.desc = self.desc + p.value = self.value + p.display() + diff --git a/util/stats/stats.py b/util/stats/stats.py new file mode 100755 index 000000000..1d521fd9d --- /dev/null +++ b/util/stats/stats.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python +from __future__ import division +import re, sys + +def usage(): + print '''\ +Usage: %s [-E] [-F] [-d <db> ] [-g <get> ] [-h <host>] [-p] + [-s <system>] [-r <runs> ] [-u <username>] <command> [command args] +''' % sys.argv[0] + sys.exit(1) + +def getopts(list, flags): + import getopt + try: + opts, args = getopt.getopt(list, flags) + except getopt.GetoptError: + usage() + + return opts, args + +def printval(name, value, invert = False): + if invert and value != 0.0: + value = 1 / value + + if value == (1e300*1e300): + return + + if printval.mode == 'G': + print '%s: %g' % (name, value) + elif printval.mode != 'F' and value > 1e6: + print '%s: %0.5e' % (name, value) + else: + print '%s: %f' % (name, value) + +printval.mode = 'G' + +def unique(list): + set = {} + map(set.__setitem__, list, []) + return set.keys() + +def graphdata(runs, tag, label, value): + import info + configs = ['std', 'csa', 'ht1', 'ht4', 'htx', 'ocm', 'occ', 'ocp' ] + benchmarks = [ 'm', 's' ] + dmas = [ 'x', 'd', 'b' ] + caches = [ '1', '2', '3', '4', '5' ] + systems = [ 'M' ] + checkpoints = [ '1' ] + + names = [] + for bench in benchmarks: + for dma in dmas: + for cache in caches: + for sys in systems: + for cpt in checkpoints: + names.append([bench, dma, cache, sys, cpt]) + + for bench,dma,cache,sys,cpt in names: + base = '%s.%s.%s.%s.%s' % (bench, dma, cache, sys, cpt) + fname = '/n/ziff/z/binkertn/graph/data.ibm/%s.%s.dat' % (tag, base) + f = open(fname, 'w') + print >>f, '#set TITLE = %s' % base + print >>f, '#set xlbl = Configuration' + print >>f, '#set ylbl = %s' % label + print >>f, '#set sublabels = %s' % ' '.join(configs) + + for speed,freq in zip(['s', 'q'],['4GHz','10GHz']): + print >>f, '"%s"' % freq, + for conf in configs: + name = '%s.%s.%s.%s.%s.%s.%s' % (conf, bench, dma, speed, + cache, sys, cpt) + run = info.source.allRunNames[name] + info.display_run = run.run; + val = float(value) + if val == 1e300*1e300: + print >>f, 0.0, + else: + print >>f, "%f" % val, + print >>f + f.close() + +def printdata(runs, value, invert = False): + import info + for run in runs: + info.display_run = run.run; + val = float(value) + printval(run.name, val) + +class CommandException(Exception): + pass + +def commands(options, command, args): + if command == 'database': + if len(args) == 0: raise CommandException + + import dbinit + mydb = dbinit.MyDB(options) + + if args[0] == 'drop': + if len(args) > 2: raise CommandException + mydb.admin() + mydb.drop() + if len(args) == 2 and args[1] == 'init': + mydb.create() + mydb.connect() + mydb.populate() + mydb.close() + return + + if args[0] == 'init': + if len(args) > 1: raise CommandException + mydb.admin() + mydb.create() + mydb.connect() + mydb.populate() + mydb.close() + return + + if args[0] == 'clean': + if len(args) > 1: raise CommandException + mydb.connect() + mydb.clean() + return + + raise CommandException + + import db, info + info.source = db.Database() + info.source.host = options.host + info.source.db = options.db + info.source.passwd = options.passwd + info.source.user = options.user + info.source.connect() + info.source.update_dict(globals()) + + system = info.source.__dict__[options.system] + + if type(options.get) is str: + info.source.get = options.get + + if options.runs is None: + runs = info.source.allRuns + else: + rx = re.compile(options.runs) + runs = [] + for run in info.source.allRuns: + if rx.match(run.name): + runs.append(run) + + info.display_run = runs[0].run + + if command == 'runs': + user = None + opts, args = getopts(args, '-u') + if len(args): + raise CommandException + for o,a in opts: + if o == '-u': + user = a + info.source.listRuns(user) + return + + if command == 'stats': + if len(args) == 0: + info.source.listStats() + elif len(args) == 1: + info.source.listStats(args[0]) + else: + raise CommandException + + return + + if command == 'stat': + if len(args) != 1: + raise CommandException + + stats = info.source.getStat(args[0]) + for stat in stats: + if graph: + graphdata(runs, stat.name, stat.name, stat) + else: + print stat.name + printdata(runs, stat) + return + + if command == 'bins': + if len(args) == 0: + info.source.listBins() + elif len(args) == 1: + info.source.listBins(args[0]) + else: + raise CommandException + + return + + if command == 'formulas': + if len(args) == 0: + info.source.listFormulas() + elif len(args) == 1: + info.source.listFormulas(args[0]) + else: + raise CommandException + + return + + if command == 'samples': + if len(args): + raise CommandException + + info.source.listTicks(runs) + return + + if len(args): + raise CommandException + + if command == 'usertime': + import copy + kernel = copy.copy(system.full_cpu.numCycles) + kernel.bins = 'kernel' + + user = copy.copy(system.full_cpu.numCycles) + user.bins = 'user' + + if graph: + graphdata(runs, 'usertime', 'User Fraction', + user / system.full_cpu.numCycles) + else: + printdata(runs, user / system.full_cpu.numCycles) + return + + if command == 'ticks': + if binned: + print 'kernel ticks' + system.full_cpu.numCycles.bins = 'kernel' + printdata(runs, system.full_cpu.numCycles) + + print 'idle ticks' + system.full_cpu.numCycles.bins = 'idle' + printdata(runs, system.full_cpu.numCycles) + + print 'user ticks' + system.full_cpu.numCycles.bins = 'user' + printdata(runs, system.full_cpu.numCycles) + + print 'total ticks' + + system.full_cpu.numCycles.bins = None + printdata(runs, system.full_cpu.numCycles) + return + + if command == 'packets': + packets = system.tsunami.nsgige.rxPackets + if graph: + graphdata(runs, 'packets', 'Packets', packets) + else: + printdata(runs, packets) + return + + if command == 'ppt' or command == 'tpp': + ppt = system.tsunami.nsgige.rxPackets / sim_ticks + printdata(runs, ppt, command == 'tpp') + return + + if command == 'pps': + pps = system.tsunami.nsgige.rxPackets / sim_seconds + if graph: + graphdata(runs, 'pps', 'Packets/s', pps) + else: + printdata(runs, pps) + return + + if command == 'bpt' or command == 'tpb': + bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes + bpt = bytes / sim_ticks * 8 + if graph: + graphdata(runs, 'bpt', 'bps / Hz', bpt) + else: + printdata(runs, bpt, command == 'tpb') + return + + if command == 'bptb' or command == 'tpbb': + bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes + + print 'kernel stats' + bytes.bins = 'kernel' + printdata(runs, bytes / ticks) + + print 'idle stats' + bytes.bins = 'idle' + printdata(runs, bytes / ticks) + + print 'user stats' + bytes.bins = 'user' + printdata(runs, bytes / ticks) + + return + + if command == 'bytes': + stat = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes + + if binned: + print '%s kernel stats' % stat.name + stat.bins = 'kernel' + printdata(runs, stat) + + print '%s idle stats' % stat.name + stat.bins = 'idle' + printdata(runs, stat) + + print '%s user stats' % stat.name + stat.bins = 'user' + printdata(runs, stat) + + print '%s total stats' % stat.name + stat.bins = None + + printdata(runs, stat) + return + + if command == 'rxbps': + gbps = system.tsunami.nsgige.rxBandwidth / 1e9 + if graph: + graphdata(runs, 'rxbps', 'Bandwidth (Gbps)', gbps) + else: + printdata(runs, gbps) + return + + if command == 'txbps': + gbps = system.tsunami.nsgige.txBandwidth / 1e9 + if graph: + graphdata(runs, 'txbps', 'Bandwidth (Gbps)', gbps) + else: + printdata(runs, gbps) + return + + if command == 'bps': + rxbps = system.tsunami.nsgige.rxBandwidth + txbps = system.tsunami.nsgige.txBandwidth + gbps = (rxbps + txbps) / 1e9 + if graph: + graphdata(runs, 'bps', 'Bandwidth (Gbps)', gbps) + else: + printdata(runs, gbps) + return + + if command == 'misses': + stat = system.L3.overall_mshr_misses + if binned: + print '%s kernel stats' % stat.name + stat.bins = 'kernel' + printdata(runs, stat) + + print '%s idle stats' % stat.name + stat.bins = 'idle' + printdata(runs, stat) + + print '%s user stats' % stat.name + stat.bins = 'user' + printdata(runs, stat) + + print '%s total stats' % stat.name + + stat.bins = None + if graph: + graphdata(runs, 'misses', 'Overall MSHR Misses', stat) + else: + printdata(runs, stat) + return + + if command == 'mpkb': + misses = system.L3.overall_mshr_misses + rxbytes = system.tsunami.nsgige.rxBytes + txbytes = system.tsunami.nsgige.txBytes + + if binned: + print 'mpkb kernel stats' + misses.bins = 'kernel' + mpkb = misses / ((rxbytes + txbytes) / 1024) + printdata(runs, mpkb) + + print 'mpkb idle stats' + misses.bins = 'idle' + mpkb = misses / ((rxbytes + txbytes) / 1024) + printdata(runs, mpkb) + + print 'mpkb user stats' + misses.bins = 'user' + mpkb = misses / ((rxbytes + txbytes) / 1024) + printdata(runs, mpkb) + + print 'mpkb total stats' + + mpkb = misses / ((rxbytes + txbytes) / 1024) + misses.bins = None + if graph: + graphdata(runs, 'mpkb', 'Misses / KB', mpkb) + else: + printdata(runs, mpkb) + return + + if command == 'execute': + printdata(runs, system.full_cpu.ISSUE__count) + return + + if command == 'commit': + printdata(runs, system.full_cpu.COM__count) + return + + if command == 'fetch': + printdata(runs, system.full_cpu.FETCH__count) + return + + if command == 'rxbpp': + bpp = system.tsunami.nsgige.rxBytes / system.tsunami.nsgige.rxPackets + printdata(run, 8 * bpp) + return + + if command == 'txbpp': + bpp = system.tsunami.nsgige.txBytes / system.tsunami.nsgige.txPackets + printdata(run, 8 * bpp) + return + + raise CommandException + + +graph = False +binned = False + +class Options: pass + +if __name__ == '__main__': + import getpass + + options = Options() + options.host = 'zizzer.pool' + options.db = None + options.passwd = '' + options.user = getpass.getuser() + options.runs = None + options.system = 'client' + options.get = None + + opts, args = getopts(sys.argv[1:], '-BEFGd:g:h:pr:s:u:') + for o,a in opts: + if o == '-B': + options.binned = True + if o == '-E': + printval.mode = 'E' + if o == '-F': + printval.mode = 'F' + if o == '-G': + options.graph = True; + if o == '-d': + options.db = a + if o == '-g': + options.get = a + if o == '-h': + options.host = a + if o == '-p': + options.passwd = getpass.getpass() + if o == '-r': + options.runs = a + if o == '-u': + options.user = a + if o == '-s': + options.system = a + + if len(args) == 0: + usage() + + command = args[0] + args = args[1:] + + try: + commands(options, command, args) + except CommandException: + usage() |