summaryrefslogtreecommitdiff
path: root/util/stats/db.py
diff options
context:
space:
mode:
Diffstat (limited to 'util/stats/db.py')
-rw-r--r--util/stats/db.py415
1 files changed, 415 insertions, 0 deletions
diff --git a/util/stats/db.py b/util/stats/db.py
new file mode 100644
index 000000000..4cba82446
--- /dev/null
+++ b/util/stats/db.py
@@ -0,0 +1,415 @@
+import MySQLdb, re, string
+
+def statcmp(a, b):
+ v1 = a.split('.')
+ v2 = b.split('.')
+
+ last = min(len(v1), len(v2)) - 1
+ for i,j in zip(v1[0:last], v2[0:last]):
+ if i != j:
+ return cmp(i, j)
+
+ # Special compare for last element.
+ if len(v1) == len(v2):
+ return cmp(v1[last], v2[last])
+ else:
+ return cmp(len(v1), len(v2))
+
+class RunData:
+ def __init__(self, row):
+ self.run = int(row[0])
+ self.name = row[1]
+ self.user = row[2]
+ self.project = row[3]
+
+class SubData:
+ def __init__(self, row):
+ self.stat = int(row[0])
+ self.x = int(row[1])
+ self.y = int(row[2])
+ self.name = row[3]
+ self.descr = row[4]
+
+class Data:
+ def __init__(self, row):
+ if len(row) != 5:
+ raise 'stat db error'
+ self.stat = int(row[0])
+ self.run = int(row[1])
+ self.x = int(row[2])
+ self.y = int(row[3])
+ self.data = float(row[4])
+
+ def __repr__(self):
+ return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
+ self.run, self.x, self.y, self.data)
+
+class StatData(object):
+ def __init__(self, row):
+ self.stat = int(row[0])
+ self.name = row[1]
+ self.desc = row[2]
+ self.type = row[3]
+ self.prereq = int(row[5])
+ self.precision = int(row[6])
+
+ import flags
+ self.flags = 0
+ if int(row[4]): self.flags |= flags.printable
+ if int(row[7]): self.flags |= flags.nozero
+ if int(row[8]): self.flags |= flags.nonan
+ if int(row[9]): self.flags |= flags.total
+ if int(row[10]): self.flags |= flags.pdf
+ if int(row[11]): self.flags |= flags.cdf
+
+ if self.type == 'DIST' or self.type == 'VECTORDIST':
+ self.min = float(row[12])
+ self.max = float(row[13])
+ self.bktsize = float(row[14])
+ self.size = int(row[15])
+
+ if self.type == 'FORMULA':
+ self.formula = self.db.allFormulas[self.stat]
+
+class Node(object):
+ def __init__(self, name):
+ self.name = name
+ def __str__(self):
+ return name
+
+class Database(object):
+ def __init__(self):
+ self.host = 'zizzer.pool'
+ self.user = ''
+ self.passwd = ''
+ self.db = 'm5stats'
+ self.cursor = None
+
+ self.allStats = []
+ self.allStatIds = {}
+ self.allStatNames = {}
+
+ self.allSubData = {}
+
+ self.allRuns = []
+ self.allRunIds = {}
+ self.allRunNames = {}
+
+ self.allBins = []
+ self.allBinIds = {}
+ self.allBinNames = {}
+
+ self.allFormulas = {}
+
+ self.stattop = {}
+ self.statdict = {}
+ self.statlist = []
+
+ self.mode = 'sum';
+ self.runs = None
+ self.bins = None
+ self.ticks = None
+ self.__dict__['get'] = type(self).sum
+
+ def query(self, sql):
+ self.cursor.execute(sql)
+
+ def update_dict(self, dict):
+ dict.update(self.stattop)
+
+ def append(self, stat):
+ statname = re.sub(':', '__', stat.name)
+ path = string.split(statname, '.')
+ pathtop = path[0]
+ fullname = ''
+
+ x = self
+ while len(path) > 1:
+ name = path.pop(0)
+ if not x.__dict__.has_key(name):
+ x.__dict__[name] = Node(fullname + name)
+ x = x.__dict__[name]
+ fullname = '%s%s.' % (fullname, name)
+
+ name = path.pop(0)
+ x.__dict__[name] = stat
+
+ self.stattop[pathtop] = self.__dict__[pathtop]
+ self.statdict[statname] = stat
+ self.statlist.append(statname)
+
+ def connect(self):
+ # connect
+ self.thedb = MySQLdb.connect(db=self.db,
+ host=self.host,
+ user=self.user,
+ passwd=self.passwd)
+
+ # create a cursor
+ self.cursor = self.thedb.cursor()
+
+ self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
+ from runs''')
+ for result in self.cursor.fetchall():
+ run = RunData(result);
+ self.allRuns.append(run)
+ self.allRunIds[run.run] = run
+ self.allRunNames[run.name] = run
+
+ self.query('select * from bins')
+ for id,name in self.cursor.fetchall():
+ self.allBinIds[int(id)] = name
+ self.allBinNames[name] = int(id)
+
+ self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
+ for result in self.cursor.fetchall():
+ subdata = SubData(result)
+ if self.allSubData.has_key(subdata.stat):
+ self.allSubData[subdata.stat].append(subdata)
+ else:
+ self.allSubData[subdata.stat] = [ subdata ]
+
+ self.query('select * from formulas')
+ for id,formula in self.cursor.fetchall():
+ self.allFormulas[int(id)] = formula
+
+ StatData.db = self
+ self.query('select * from stats')
+ import info
+ for result in self.cursor.fetchall():
+ stat = info.NewStat(StatData(result))
+ self.append(stat)
+ self.allStats.append(stat)
+ self.allStatIds[stat.stat] = stat
+ self.allStatNames[stat.name] = stat
+
+ # Name: listbins
+ # Desc: Prints all bins matching regex argument, if no argument
+ # is given all bins are returned
+ def listBins(self, regex='.*'):
+ print '%-50s %-10s' % ('bin name', 'id')
+ print '-' * 61
+ names = self.allBinNames.keys()
+ names.sort()
+ for name in names:
+ id = self.allBinNames[name]
+ print '%-50s %-10d' % (name, id)
+
+ # Name: listruns
+ # Desc: Prints all runs matching a given user, if no argument
+ # is given all runs are returned
+ def listRuns(self, user=None):
+ print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
+ print '-' * 62
+ for run in self.allRuns:
+ if user == None or user == run.user:
+ print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
+
+ # Name: listTicks
+ # Desc: Prints all samples for a given run
+ def listTicks(self, run=None):
+ print "tick"
+ print "----------------------------------------"
+ sql = 'select distinct dt_tick from data where dt_stat=1950'
+ #if run != None:
+ # sql += ' where dt_run=%d' % run
+ self.query(sql)
+ for r in self.cursor.fetchall():
+ print r[0]
+
+ # Name: liststats
+ # Desc: Prints all statistics that appear in the database,
+ # the optional argument is a regular expression that can
+ # be used to prune the result set
+ def listStats(self, regex=None):
+ print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
+ print '-' * 80
+
+ rx = None
+ if regex != None:
+ rx = re.compile(regex)
+
+ stats = [ stat.name for stat in self.allStats ]
+ stats.sort(statcmp)
+ for stat in stats:
+ stat = self.allStatNames[stat]
+ if rx == None or rx.match(stat.name):
+ print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
+
+ # Name: liststats
+ # Desc: Prints all statistics that appear in the database,
+ # the optional argument is a regular expression that can
+ # be used to prune the result set
+ def listFormulas(self, regex=None):
+ print '%-60s %s' % ('formula name', 'formula')
+ print '-' * 80
+
+ rx = None
+ if regex != None:
+ rx = re.compile(regex)
+
+ stats = [ stat.name for stat in self.allStats ]
+ stats.sort(statcmp)
+ for stat in stats:
+ stat = self.allStatNames[stat]
+ if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
+ print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
+
+ def getStat(self, stats):
+ if type(stats) is not list:
+ stats = [ stats ]
+
+ ret = []
+ for stat in stats:
+ if type(stat) is int:
+ ret.append(self.allStatIds[stat])
+
+ if type(stat) is str:
+ rx = re.compile(stat)
+ for stat in self.allStats:
+ if rx.match(stat.name):
+ ret.append(stat)
+ return ret
+
+ def getBin(self, bins):
+ if type(bins) is not list:
+ bins = [ bins ]
+
+ ret = []
+ for bin in bins:
+ if type(bin) is int:
+ ret.append(bin)
+ elif type(bin) is str:
+ ret.append(self.allBinNames[bin])
+ else:
+ for name,id in self.allBinNames.items():
+ if bin.match(name):
+ ret.append(id)
+
+ return ret
+
+ def getNotBin(self, bin):
+ map = {}
+ for bin in getBin(bin):
+ map[bin] = 1
+
+ ret = []
+ for bin in self.allBinIds.keys():
+ if not map.has_key(bin):
+ ret.append(bin)
+
+ return ret
+
+ #########################################
+ # get the data
+ #
+ def inner(self, op, stat, bins, ticks, group=False):
+ sql = 'select '
+ sql += 'dt_stat as stat, '
+ sql += 'dt_run as run, '
+ sql += 'dt_x as x, '
+ sql += 'dt_y as y, '
+ if group:
+ sql += 'dt_tick as tick, '
+ sql += '%s(dt_data) as data ' % op
+ sql += 'from data '
+ sql += 'where '
+
+ if isinstance(stat, list):
+ val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
+ sql += ' (%s)' % val
+ else:
+ sql += ' dt_stat=%d' % stat.stat
+
+ if self.runs != None and len(self.runs):
+ val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
+ sql += ' and (%s)' % val
+
+ if bins != None and len(bins):
+ val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
+ sql += ' and (%s)' % val
+
+ if ticks != None and len(ticks):
+ val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
+ sql += ' and (%s)' % val
+
+ sql += ' group by dt_stat,dt_run,dt_x,dt_y'
+ if group:
+ sql += ',dt_tick'
+ return sql
+
+ def outer(self, op_out, op_in, stat, bins, ticks):
+ sql = self.inner(op_in, stat, bins, ticks, True)
+ sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
+ sql += 'group by stat,run,x,y'
+ return sql
+
+ # Name: sum
+ # Desc: given a run, a stat and an array of samples and bins,
+ # sum all the bins and then get the standard deviation of the
+ # samples for non-binned runs. This will just return the average
+ # of samples, however a bin array still must be passed
+ def sum(self, stat, bins, ticks):
+ return self.inner('sum', stat, bins, ticks)
+
+ # Name: avg
+ # Desc: given a run, a stat and an array of samples and bins,
+ # sum all the bins and then average the samples for non-binned
+ # runs this will just return the average of samples, however
+ # a bin array still must be passed
+ def avg(self, stat, bins, ticks):
+ return self.outer('avg', 'sum', stat, bins, ticks)
+
+ # Name: stdev
+ # Desc: given a run, a stat and an array of samples and bins,
+ # sum all the bins and then get the standard deviation of the
+ # samples for non-binned runs. This will just return the average
+ # of samples, however a bin array still must be passed
+ def stdev(self, stat, bins, ticks):
+ return self.outer('stddev', 'sum', stat, bins, ticks)
+
+ def __getattribute__(self, attr):
+ if attr != 'get':
+ return super(Database, self).__getattribute__(attr)
+
+ if self.__dict__['get'] == type(self).sum:
+ return 'sum'
+ elif self.__dict__['get'] == type(self).avg:
+ return 'avg'
+ elif self.__dict__['get'] == type(self).stdev:
+ return 'stdev'
+ else:
+ return ''
+
+ def __setattr__(self, attr, value):
+ if attr != 'get':
+ super(Database, self).__setattr__(attr, value)
+ return
+
+ if value == 'sum':
+ self.__dict__['get'] = type(self).sum
+ elif value == 'avg':
+ self.__dict__['get'] = type(self).avg
+ elif value == 'stdev':
+ self.__dict__['get'] = type(self).stdev
+ else:
+ raise AttributeError, "can only set get to: sum | avg | stdev"
+
+ def data(self, stat, bins=None, ticks=None):
+ if bins is None:
+ bins = self.bins
+ if ticks is None:
+ ticks = self.ticks
+ sql = self.__dict__['get'](self, stat, bins, ticks)
+ self.query(sql)
+
+ runs = {}
+ for x in self.cursor.fetchall():
+ data = Data(x)
+ if not runs.has_key(data.run):
+ runs[data.run] = {}
+ if not runs[data.run].has_key(data.x):
+ runs[data.run][data.x] = {}
+
+ runs[data.run][data.x][data.y] = data.data
+ return runs