1 files changed, 415 insertions, 0 deletions
diff --git a/util/stats/db.py b/util/stats/db.py
new file mode 100644
index 000000000..4cba82446
--- /dev/null
+++ b/util/stats/db.py
@@ -0,0 +1,415 @@
+import MySQLdb, re, string
+
+def statcmp(a, b):
+    v1 = a.split('.')
+    v2 = b.split('.')
+
+    last = min(len(v1), len(v2)) - 1
+    for i,j in zip(v1[0:last], v2[0:last]):
+        if i != j:
+            return cmp(i, j)
+
+    # Special compare for last element.
+    if len(v1) == len(v2):
+        return cmp(v1[last], v2[last])
+    else:
+        return cmp(len(v1), len(v2))
+
+class RunData:
+    def __init__(self, row):
+        self.run = int(row[0])
+        self.name = row[1]
+        self.user = row[2]
+        self.project = row[3]
+
+class SubData:
+    def __init__(self, row):
+        self.stat = int(row[0])
+        self.x = int(row[1])
+        self.y = int(row[2])
+        self.name = row[3]
+        self.descr = row[4]
+
+class Data:
+    def __init__(self, row):
+        if len(row) != 5:
+            raise 'stat db error'
+        self.stat = int(row[0])
+        self.run = int(row[1])
+        self.x = int(row[2])
+        self.y = int(row[3])
+        self.data = float(row[4])
+
+    def __repr__(self):
+        return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
+            self.run, self.x, self.y, self.data)
+
+class StatData(object):
+    def __init__(self, row):
+        self.stat = int(row[0])
+        self.name = row[1]
+        self.desc = row[2]
+        self.type = row[3]
+        self.prereq = int(row[5])
+        self.precision = int(row[6])
+
+        import flags
+        self.flags = 0
+        if int(row[4]): self.flags |= flags.printable
+        if int(row[7]): self.flags |= flags.nozero
+        if int(row[8]): self.flags |= flags.nonan
+        if int(row[9]): self.flags |= flags.total
+        if int(row[10]): self.flags |= flags.pdf
+        if int(row[11]): self.flags |= flags.cdf
+
+        if self.type == 'DIST' or self.type == 'VECTORDIST':
+            self.min = float(row[12])
+            self.max = float(row[13])
+            self.bktsize = float(row[14])
+            self.size = int(row[15])
+
+        if self.type == 'FORMULA':
+            self.formula = self.db.allFormulas[self.stat]
+
+class Node(object):
+    def __init__(self, name):
+        self.name = name
+    def __str__(self):
+        return name
+
+class Database(object):
+    def __init__(self):
+        self.host = 'zizzer.pool'
+        self.user = ''
+        self.passwd = ''
+        self.db = 'm5stats'
+        self.cursor = None
+
+        self.allStats = []
+        self.allStatIds = {}
+        self.allStatNames = {}
+
+        self.allSubData = {}
+
+        self.allRuns = []
+        self.allRunIds = {}
+        self.allRunNames = {}
+
+        self.allBins = []
+        self.allBinIds = {}
+        self.allBinNames = {}
+
+        self.allFormulas = {}
+
+        self.stattop = {}
+        self.statdict = {}
+        self.statlist = []
+
+        self.mode = 'sum';
+        self.runs = None
+        self.bins = None
+        self.ticks = None
+        self.__dict__['get'] = type(self).sum
+
+    def query(self, sql):
+        self.cursor.execute(sql)
+
+    def update_dict(self, dict):
+        dict.update(self.stattop)
+
+    def append(self, stat):
+        statname = re.sub(':', '__', stat.name)
+        path = string.split(statname, '.')
+        pathtop = path[0]
+        fullname = ''
+
+        x = self
+        while len(path) > 1:
+            name = path.pop(0)
+            if not x.__dict__.has_key(name):
+                x.__dict__[name] = Node(fullname + name)
+            x = x.__dict__[name]
+            fullname = '%s%s.' % (fullname, name)
+
+        name = path.pop(0)
+        x.__dict__[name] = stat
+
+        self.stattop[pathtop] = self.__dict__[pathtop]
+        self.statdict[statname] = stat
+        self.statlist.append(statname)
+
+    def connect(self):
+        # connect
+        self.thedb = MySQLdb.connect(db=self.db,
+                                     host=self.host,
+                                     user=self.user,
+                                     passwd=self.passwd)
+
+        # create a cursor
+        self.cursor = self.thedb.cursor()
+
+        self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
+                   from runs''')
+        for result in self.cursor.fetchall():
+            run = RunData(result);
+            self.allRuns.append(run)
+            self.allRunIds[run.run] = run
+            self.allRunNames[run.name] = run
+
+        self.query('select * from bins')
+        for id,name in self.cursor.fetchall():
+            self.allBinIds[int(id)] = name
+            self.allBinNames[name] = int(id)
+
+        self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
+        for result in self.cursor.fetchall():
+            subdata = SubData(result)
+            if self.allSubData.has_key(subdata.stat):
+                self.allSubData[subdata.stat].append(subdata)
+            else:
+                self.allSubData[subdata.stat] = [ subdata ]
+
+        self.query('select * from formulas')
+        for id,formula in self.cursor.fetchall():
+            self.allFormulas[int(id)] = formula
+
+        StatData.db = self
+        self.query('select * from stats')
+        import info
+        for result in self.cursor.fetchall():
+            stat = info.NewStat(StatData(result))
+            self.append(stat)
+            self.allStats.append(stat)
+            self.allStatIds[stat.stat] = stat
+            self.allStatNames[stat.name] = stat
+
+    # Name: listbins
+    # Desc: Prints all bins matching regex argument, if no argument
+    #       is given all bins are returned
+    def listBins(self, regex='.*'):
+        print '%-50s %-10s' % ('bin name', 'id')
+        print '-' * 61
+        names = self.allBinNames.keys()
+        names.sort()
+        for name in names:
+            id = self.allBinNames[name]
+            print '%-50s %-10d' % (name, id)
+
+    # Name: listruns
+    # Desc: Prints all runs matching a given user, if no argument
+    #       is given all runs are returned
+    def listRuns(self, user=None):
+        print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
+        print '-' * 62
+        for run in self.allRuns:
+            if user == None or user == run.user:
+                print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
+
+    # Name: listTicks
+    # Desc: Prints all samples for a given run
+    def listTicks(self, run=None):
+        print "tick"
+        print "----------------------------------------"
+        sql = 'select distinct dt_tick from data where dt_stat=1950'
+        #if run != None:
+        #    sql += ' where dt_run=%d' % run
+        self.query(sql)
+        for r in self.cursor.fetchall():
+            print r[0]
+
+    # Name: liststats
+    # Desc: Prints all statistics that appear in the database,
+    #         the optional argument is a regular expression that can
+    #         be used to prune the result set
+    def listStats(self, regex=None):
+        print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
+        print '-' * 80
+
+        rx = None
+        if regex != None:
+            rx = re.compile(regex)
+
+        stats = [ stat.name for stat in self.allStats ]
+        stats.sort(statcmp)
+        for stat in stats:
+            stat = self.allStatNames[stat]
+            if rx == None or rx.match(stat.name):
+                print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
+
+    # Name: liststats
+    # Desc: Prints all statistics that appear in the database,
+    #         the optional argument is a regular expression that can
+    #         be used to prune the result set
+    def listFormulas(self, regex=None):
+        print '%-60s %s' % ('formula name', 'formula')
+        print '-' * 80
+
+        rx = None
+        if regex != None:
+            rx = re.compile(regex)
+
+        stats = [ stat.name for stat in self.allStats ]
+        stats.sort(statcmp)
+        for stat in stats:
+            stat = self.allStatNames[stat]
+            if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
+                print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
+
+    def getStat(self, stats):
+        if type(stats) is not list:
+            stats = [ stats ]
+
+        ret = []
+        for stat in stats:
+            if type(stat) is int:
+                ret.append(self.allStatIds[stat])
+
+            if type(stat) is str:
+                rx = re.compile(stat)
+                for stat in self.allStats:
+                    if rx.match(stat.name):
+                        ret.append(stat)
+        return ret
+
+    def getBin(self, bins):
+        if type(bins) is not list:
+            bins = [ bins ]
+
+        ret = []
+        for bin in bins:
+            if type(bin) is int:
+                ret.append(bin)
+            elif type(bin) is str:
+                ret.append(self.allBinNames[bin])
+            else:
+                for name,id in self.allBinNames.items():
+                    if bin.match(name):
+                        ret.append(id)
+
+        return ret
+
+    def getNotBin(self, bin):
+        map = {}
+        for bin in getBin(bin):
+            map[bin] = 1
+
+        ret = []
+        for bin in self.allBinIds.keys():
+            if not map.has_key(bin):
+                ret.append(bin)
+
+        return ret
+
+    #########################################
+    # get the data
+    #
+    def inner(self, op, stat, bins, ticks, group=False):
+        sql = 'select '
+        sql += 'dt_stat as stat, '
+        sql += 'dt_run as run, '
+        sql += 'dt_x as x, '
+        sql += 'dt_y as y, '
+        if group:
+            sql += 'dt_tick as tick, '
+        sql += '%s(dt_data) as data ' % op
+        sql += 'from data '
+        sql += 'where '
+
+        if isinstance(stat, list):
+            val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
+            sql += ' (%s)' % val
+        else:
+            sql += ' dt_stat=%d' % stat.stat
+
+        if self.runs != None and len(self.runs):
+            val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
+            sql += ' and (%s)' % val
+
+        if bins != None and len(bins):
+            val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
+            sql += ' and (%s)' % val
+
+        if ticks != None and len(ticks):
+            val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
+            sql += ' and (%s)' % val
+
+        sql += ' group by dt_stat,dt_run,dt_x,dt_y'
+        if group:
+            sql += ',dt_tick'
+        return sql
+
+    def outer(self, op_out, op_in, stat, bins, ticks):
+        sql = self.inner(op_in, stat, bins, ticks, True)
+        sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
+        sql += 'group by stat,run,x,y'
+        return sql
+
+    # Name: sum
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then get the standard deviation of the
+    #        samples for non-binned runs. This will just return the average
+    #        of samples, however a bin array still must be passed
+    def sum(self, stat, bins, ticks):
+        return self.inner('sum', stat, bins, ticks)
+
+    # Name: avg
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then average the samples for non-binned
+    #        runs this will just return the average of samples, however
+    #        a bin array still must be passed
+    def avg(self, stat, bins, ticks):
+        return self.outer('avg', 'sum', stat, bins, ticks)
+
+    # Name: stdev
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then get the standard deviation of the
+    #        samples for non-binned runs. This will just return the average
+    #        of samples, however a bin array still must be passed
+    def stdev(self, stat, bins, ticks):
+        return self.outer('stddev', 'sum', stat, bins, ticks)
+
+    def __getattribute__(self, attr):
+        if attr != 'get':
+            return super(Database, self).__getattribute__(attr)
+
+        if self.__dict__['get'] == type(self).sum:
+            return 'sum'
+        elif self.__dict__['get'] == type(self).avg:
+            return 'avg'
+        elif self.__dict__['get'] == type(self).stdev:
+            return 'stdev'
+        else:
+            return ''
+
+    def __setattr__(self, attr, value):
+        if attr != 'get':
+            super(Database, self).__setattr__(attr, value)
+            return
+
+        if value == 'sum':
+            self.__dict__['get'] = type(self).sum
+        elif value == 'avg':
+            self.__dict__['get'] = type(self).avg
+        elif value == 'stdev':
+            self.__dict__['get'] = type(self).stdev
+        else:
+            raise AttributeError, "can only set get to: sum | avg | stdev"
+
+    def data(self, stat, bins=None, ticks=None):
+        if bins is None:
+            bins = self.bins
+        if ticks is None:
+            ticks = self.ticks
+        sql = self.__dict__['get'](self, stat, bins, ticks)
+        self.query(sql)
+
+        runs = {}
+        for x in self.cursor.fetchall():
+            data = Data(x)
+            if not runs.has_key(data.run):
+                runs[data.run] = {}
+            if not runs[data.run].has_key(data.x):
+                runs[data.run][data.x] = {}
+
+            runs[data.run][data.x][data.y] = data.data
+        return runs