diff options
author | Ali Saidi <saidi@eecs.umich.edu> | 2004-08-10 13:32:15 -0400 |
---|---|---|
committer | Ali Saidi <saidi@eecs.umich.edu> | 2004-08-10 13:32:15 -0400 |
commit | abcd11d36f90174f1c8c5b7bff013809572e1a10 (patch) | |
tree | 4818bbe5adb505d5c4995bbdde7a741d871582c0 /util/stats/dbinit.py | |
parent | 6ee18e46163c0342c93f9a879c575e5cafe32b96 (diff) | |
parent | 00270ba3a7cd2e9e33df79b3adcc355a6ee3fb40 (diff) | |
download | gem5-abcd11d36f90174f1c8c5b7bff013809572e1a10.tar.xz |
Merge zizzer:/bk/m5 into zeep.eecs.umich.edu:/z/saidi/work/m5
--HG--
extra : convert_revision : 006842b706c7d972f6722b2767c82c6761c447b2
Diffstat (limited to 'util/stats/dbinit.py')
-rw-r--r-- | util/stats/dbinit.py | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/util/stats/dbinit.py b/util/stats/dbinit.py new file mode 100644 index 000000000..686f55c98 --- /dev/null +++ b/util/stats/dbinit.py @@ -0,0 +1,388 @@ +import MySQLdb + +class MyDB(object): + def __init__(self, options): + self.name = options.db + self.host = options.host + self.user = options.user + self.passwd = options.passwd + self.mydb = None + self.cursor = None + + def admin(self): + self.close() + self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user, + passwd=self.passwd) + self.cursor = self.mydb.cursor() + + def connect(self): + self.close() + self.mydb = MySQLdb.connect(db=self.name, host=self.host, + user=self.user, passwd=self.passwd) + self.cursor = self.mydb.cursor() + + def close(self): + if self.mydb is not None: + self.mydb.close() + self.cursor = None + + def query(self, sql): + self.cursor.execute(sql) + + def drop(self): + self.query('DROP DATABASE IF EXISTS %s' % self.name) + + def create(self): + self.query('CREATE DATABASE %s' % self.name) + + def populate(self): + # + # Each run (or simulation) gets its own entry in the runs table to + # group stats by where they were generated + # + # COLUMNS: + # 'id' is a unique identifier for each run to be used in other + # tables. + # 'name' is the user designated name for the data generated. It is + # configured in the simulator. + # 'user' identifies the user that generated the data for the given + # run. + # 'project' another name to identify runs for a specific goal + # 'date' is a timestamp for when the data was generated. It can be + # used to easily expire data that was generated in the past. + # 'expire' is a timestamp for when the data should be removed from + # the database so we don't have years worth of junk. + # + # INDEXES: + # 'run' is indexed so you can find out details of a run if the run + # was retreived from the data table. + # 'name' is indexed so that two all run names are forced to be unique + # + self.query(''' + CREATE TABLE runs( + rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, + rn_name VARCHAR(200) NOT NULL, + rn_sample VARCHAR(32) NOT NULL, + rn_user VARCHAR(32) NOT NULL, + rn_project VARCHAR(100) NOT NULL, + rn_date TIMESTAMP NOT NULL, + rn_expire TIMESTAMP NOT NULL, + PRIMARY KEY (rn_id), + UNIQUE (rn_name,rn_sample) + ) TYPE=InnoDB''') + + # + # We keep the bin names separate so that the data table doesn't get + # huge since bin names are frequently repeated. + # + # COLUMNS: + # 'id' is the unique bin identifer. + # 'name' is the string name for the bin. + # + # INDEXES: + # 'bin' is indexed to get the name of a bin when data is retrieved + # via the data table. + # 'name' is indexed to get the bin id for a named bin when you want + # to search the data table based on a specific bin. + # + self.query(''' + CREATE TABLE bins( + bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, + bn_name VARCHAR(255) NOT NULL, + PRIMARY KEY(bn_id), + UNIQUE (bn_name) + ) TYPE=InnoDB''') + + # + # The stat table gives us all of the data for a particular stat. + # + # COLUMNS: + # 'stat' is a unique identifier for each stat to be used in other + # tables for references. + # 'name' is simply the simulator derived name for a given + # statistic. + # 'descr' is the description of the statistic and what it tells + # you. + # 'type' defines what the stat tells you. Types are: + # SCALAR: A simple scalar statistic that holds one value + # VECTOR: An array of statistic values. Such a something that + # is generated per-thread. Vectors exist to give averages, + # pdfs, cdfs, means, standard deviations, etc across the + # stat values. + # DIST: Is a distribution of data. When the statistic value is + # sampled, its value is counted in a particular bucket. + # Useful for keeping track of utilization of a resource. + # (e.g. fraction of time it is 25% used vs. 50% vs. 100%) + # VECTORDIST: Can be used when the distribution needs to be + # factored out into a per-thread distribution of data for + # example. It can still be summed across threads to find + # the total distribution. + # VECTOR2D: Can be used when you have a stat that is not only + # per-thread, but it is per-something else. Like + # per-message type. + # FORMULA: This statistic is a formula, and its data must be + # looked up in the formula table, for indicating how to + # present its values. + # 'subdata' is potentially used by any of the vector types to + # give a specific name to all of the data elements within a + # stat. + # 'print' indicates whether this stat should be printed ever. + # (Unnamed stats don't usually get printed) + # 'prereq' only print the stat if the prereq is not zero. + # 'prec' number of decimal places to print + # 'nozero' don't print zero values + # 'nonan' don't print NaN values + # 'total' for vector type stats, print the total. + # 'pdf' for vector type stats, print the pdf. + # 'cdf' for vector type stats, print the cdf. + # + # The Following are for dist type stats: + # 'min' is the minimum bucket value. Anything less is an underflow. + # 'max' is the maximum bucket value. Anything more is an overflow. + # 'bktsize' is the approximate number of entries in each bucket. + # 'size' is the number of buckets. equal to (min/max)/bktsize. + # + # INDEXES: + # 'stat' is indexed so that you can find out details about a stat + # if the stat id was retrieved from the data table. + # 'name' is indexed so that you can simply look up data about a + # named stat. + # + self.query(''' + CREATE TABLE stats( + st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, + st_name VARCHAR(255) NOT NULL, + st_descr TEXT NOT NULL, + st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST", + "VECTOR2D", "FORMULA") NOT NULL, + st_print BOOL NOT NULL, + st_prereq SMALLINT UNSIGNED NOT NULL, + st_prec TINYINT NOT NULL, + st_nozero BOOL NOT NULL, + st_nonan BOOL NOT NULL, + st_total BOOL NOT NULL, + st_pdf BOOL NOT NULL, + st_cdf BOOL NOT NULL, + st_min DOUBLE NOT NULL, + st_max DOUBLE NOT NULL, + st_bktsize DOUBLE NOT NULL, + st_size SMALLINT UNSIGNED NOT NULL, + PRIMARY KEY (st_id), + UNIQUE (st_name) + ) TYPE=InnoDB''') + + # + # This is the main table of data for stats. + # + # COLUMNS: + # 'stat' refers to the stat field given in the stat table. + # + # 'x' referrs to the first dimension of a multi-dimensional stat. For + # a vector, x will start at 0 and increase for each vector + # element. + # For a distribution: + # -1: sum (for calculating standard deviation) + # -2: sum of squares (for calculating standard deviation) + # -3: total number of samples taken (for calculating + # standard deviation) + # -4: minimum value + # -5: maximum value + # -6: underflow + # -7: overflow + # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second + # dimension. + # 'run' is the run that the data was generated from. Details up in + # the run table + # 'tick' is a timestamp generated by the simulator. + # 'bin' is the name of the bin that the data was generated in, if + # any. + # 'data' is the actual stat value. + # + # INDEXES: + # 'stat' is indexed so that a user can find all of the data for a + # particular stat. It is not unique, because that specific stat + # can be found in many runs, bins, and samples, in addition to + # having entries for the mulidimensional cases. + # 'run' is indexed to allow a user to remove all of the data for a + # particular execution run. It can also be used to allow the + # user to print out all of the data for a given run. + # + self.query(''' + CREATE TABLE data( + dt_stat SMALLINT UNSIGNED NOT NULL, + dt_x SMALLINT NOT NULL, + dt_y SMALLINT NOT NULL, + dt_run SMALLINT UNSIGNED NOT NULL, + dt_tick BIGINT UNSIGNED NOT NULL, + dt_bin SMALLINT UNSIGNED NOT NULL, + dt_data DOUBLE NOT NULL, + INDEX (dt_stat), + INDEX (dt_run), + UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin) + ) TYPE=InnoDB;''') + + # + # Names and descriptions for multi-dimensional stats (vectors, etc.) + # are stored here instead of having their own entry in the statistics + # table. This allows all parts of a single stat to easily share a + # single id. + # + # COLUMNS: + # 'stat' is the unique stat identifier from the stat table. + # 'x' is the first dimension for multi-dimensional stats + # corresponding to the data table above. + # 'y' is the second dimension for multi-dimensional stats + # corresponding to the data table above. + # 'name' is the specific subname for the unique stat,x,y combination. + # 'descr' is the specific description for the uniqe stat,x,y + # combination. + # + # INDEXES: + # 'stat' is indexed so you can get the subdata for a specific stat. + # + self.query(''' + CREATE TABLE subdata( + sd_stat SMALLINT UNSIGNED NOT NULL, + sd_x SMALLINT NOT NULL, + sd_y SMALLINT NOT NULL, + sd_name VARCHAR(255) NOT NULL, + sd_descr TEXT, + UNIQUE (sd_stat,sd_x,sd_y) + ) TYPE=InnoDB''') + + + # + # The formula table is maintained separately from the data table + # because formula data, unlike other stat data cannot be represented + # there. + # + # COLUMNS: + # 'stat' refers to the stat field generated in the stat table. + # 'formula' is the actual string representation of the formula + # itself. + # + # INDEXES: + # 'stat' is indexed so that you can just look up a formula. + # + self.query(''' + CREATE TABLE formulas( + fm_stat SMALLINT UNSIGNED NOT NULL, + fm_formula BLOB NOT NULL, + PRIMARY KEY(fm_stat) + ) TYPE=InnoDB''') + + # + # Each stat used in each formula is kept in this table. This way, if + # you want to print out a particular formula, you can simply find out + # which stats you need by looking in this table. Additionally, when + # you remove a stat from the stats table and data table, you remove + # any references to the formula in this table. When a formula is no + # longer referred to, you remove its entry. + # + # COLUMNS: + # 'stat' is the stat id from the stat table above. + # 'child' is the stat id of a stat that is used for this formula. + # There may be many children for any given 'stat' (formula) + # + # INDEXES: + # 'stat' is indexed so you can look up all of the children for a + # particular stat. + # 'child' is indexed so that you can remove an entry when a stat is + # removed. + # + self.query(''' + CREATE TABLE formula_ref( + fr_stat SMALLINT UNSIGNED NOT NULL, + fr_run SMALLINT UNSIGNED NOT NULL, + UNIQUE (fr_stat,fr_run), + INDEX (fr_stat), + INDEX (fr_run) + ) TYPE=InnoDB''') + + # COLUMNS: + # 'event' is the unique event id from the event_desc table + # 'run' is simulation run id that this event took place in + # 'tick' is the tick when the event happened + # + # INDEXES: + # 'event' is indexed so you can look up all occurences of a + # specific event + # 'run' is indexed so you can find all events in a run + # 'tick' is indexed because we want the unique thing anyway + # 'event,run,tick' is unique combination + self.query(''' + CREATE TABLE events( + ev_event SMALLINT UNSIGNED NOT NULL, + ev_run SMALLINT UNSIGNED NOT NULL, + ev_tick BIGINT UNSIGNED NOT NULL, + INDEX(ev_event), + INDEX(ev_run), + INDEX(ev_tick), + UNIQUE(ev_event,ev_run,ev_tick) + ) TYPE=InnoDB''') + + # COLUMNS: + # 'id' is the unique description id + # 'name' is the name of the event that occurred + # + # INDEXES: + # 'id' is indexed because it is the primary key and is what you use + # to look up the descriptions + # 'name' is indexed so one can find the event based on name + # + self.query(''' + CREATE TABLE event_names( + en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, + en_name VARCHAR(255) NOT NULL, + PRIMARY KEY (en_id), + UNIQUE (en_name) + ) TYPE=InnoDB''') + + def clean(self): + self.query(''' + DELETE data + FROM data + LEFT JOIN runs ON dt_run=rn_id + WHERE rn_id IS NULL''') + + self.query(''' + DELETE formula_ref + FROM formula_ref + LEFT JOIN runs ON fr_run=rn_id + WHERE rn_id IS NULL''') + + self.query(''' + DELETE formulas + FROM formulas + LEFT JOIN formula_ref ON fm_stat=fr_stat + WHERE fr_stat IS NULL''') + + self.query(''' + DELETE stats + FROM stats + LEFT JOIN data ON st_id=dt_stat + WHERE dt_stat IS NULL''') + + self.query(''' + DELETE subdata + FROM subdata + LEFT JOIN data ON sd_stat=dt_stat + WHERE dt_stat IS NULL''') + + self.query(''' + DELETE bins + FROM bins + LEFT JOIN data ON bn_id=dt_bin + WHERE dt_bin IS NULL''') + + self.query(''' + DELETE events + FROM events + LEFT JOIN runs ON ev_run=rn_id + WHERE rn_id IS NULL''') + + self.query(''' + DELETE event_names + FROM event_names + LEFT JOIN events ON en_id=ev_event + WHERE ev_event IS NULL''') |