#!/usr/bin/perl # Copyright (c) 2001-2005 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer; # redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution; # neither the name of the copyright holders nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Steve Reinhardt # # This script diffs two SimpleScalar statistics output files. # use Getopt::Std; # # -t thresh sets threshold for ignoring differences (in %) # -p sorts differences by % chg (default is alphabetic) # -f ignores fetch-loss statistics # -d ignores all distributions # getopts('dfn:pt:h'); if ($#ARGV < 1) { print "\nError: need two file arguments (<reference> <new>).\n"; print " Options: -d = Ignore distributions\n"; print " -f = Ignore fetch-loss stats\n"; print " -p = Sort errors by percentage\n"; print " -h = Diff header info separately from stats\n"; print " -n <num> = Print top <num> errors (default 20)\n"; print " -t <num> = Error threshold in percent (default 1)\n\n"; die -1; } open(REF, "<$ARGV[0]") or die "Error: can't open $ARGV[0].\n"; open(NEW, "<$ARGV[1]") or die "Error: can't open $ARGV[1].\n"; # # Things that really should be adjustable via the command line # # Ignorable error (in percent) $err_thresh = ($opt_t) ? $opt_t : 0; # Number of stats to print before omitting $omit_count = ($opt_n) ? $opt_n : 20; # # First copy everything up to the simulation statistics to a pair of # temporary files, stripping out date-related items, and do a plain # diff. Any differences in the arguments are not necessarily an issue; # any differences in the program output should be caught by the EIO # mechanism if an EIO file is used. # # copy_header takes input filehandle and output filename sub copy_header { my ($inhandle, $outname) = @_; open(OUTPUT, ">$outname") or die "Error: can't open $outname.\n"; while (<$inhandle>) { # strip out lines that can vary next if /^(command line:|M5 compiled on |M5 simulation started |M5 executing on )/; last if /Begin Simulation Statistics/; print OUTPUT; } close OUTPUT; } if ($opt_h) { # Diff header separately from stats $refheader = "/tmp/smt-test.refheader.$$"; $newheader = "/tmp/smt-test.newheader.$$"; copy_header(\*REF, $refheader); copy_header(\*NEW, $newheader); print "\n===== Header and program output differences =====\n\n"; print `diff $refheader $newheader`; print "\n===== Statistics differences =====\n\n"; } # # Now parse statistics # # # This function takes an open filehandle and returns a reference to # a hash containing all the statistics variables and their values. # sub parse_file { $stathandle = shift; $in_dist = undef; $hashref = { }; # initialize hash for values while (<$stathandle>) { next if /^\s*$/; # skip blank lines next if /^\*\*Ignore/; # temporary, to make totaling scripts easy for ISCA 03 last if /End Simulation Statistics/; s/ *#.*//; # strip comments if (/^Memory usage: (\d+) KBytes/) { $stat = 'memory usage'; $value = $1; } elsif ($in_dist) { if ($in_dist =~ /^fetch_loss_counters/) { if (/^fetch_loss_counters_\d+\.end/) { # end line of distribution: clear $in_dist flag $in_dist = undef; next; } else { next if $opt_f; ($stat, $value) = /^(\S+)\s+(.*)/; } } else { if (/(.*)\.end_dist/) { # end line of distribution: clear $in_dist flag $in_dist = undef; next; } if ($opt_d) { next; # bail out if we are ignoring dists... } elsif (/(.*)\.(min|max)_value/) { # treat these like normal stats ($stat, $value) = /^(\S+)\s+(.*)/; } else { # this is ugly because labels in the distribution # buckets don't start in column 0 and may include # embedded spaces ($stat, $value) = /^\s*(\S+(?:.*\S)?)\s+(\d+)\s+\d+\.\d+%/; $stat = $in_dist . '::' . $stat; } } } else { if (/(.*)\.start_dist/) { # start line of distribution: set $in_dist flag # and save distribution name for future reference $in_dist = $1; $stat = $1; $value = 0; } elsif (/^(fetch_loss_counters_\d+)\.start/) { # treat fetch loss counters like distribution, sort of $in_dist = $1; $stat = $1; $value = 0; } else { ($stat, $value) = /^(\S+)\s+(.*)/; } } $$hashref{$stat} = $value; } close($stathandle); return $hashref; } # # pct_diff($old, $new) returns percent difference from $old to $new. # sub pct_diff { my ($old, $new) = @_; return ($old == 0) ? (($new == 0) ? 0 : 9999) : 100 * ($new - $old) / $old; } # # Statistics to ignore: these relate to simulator performance, not # correctness, so don't fail on changes here. # %ignore = ( 'host_seconds' => 1, 'host_tick_rate' => 1, 'host_inst_rate' => 1, 'host_mem_usage' => 1 ); # # List of key statistics (always displayed) # ==> list stats here WITHOUT trailing thread ID # @key_stat_list = ( 'COM:IPC', 'ISSUE:MSIPC', 'COM:count', 'host_inst_rate', 'sim_insts', 'sim_ticks', 'host_mem_usage' ); $key_stat_pattern = join('|', @key_stat_list); # initialize first statistics from each file $max_err_mag = 0; $refhash = parse_file(\*REF); $newhash = parse_file(\*NEW); # The string sim-smt prints on a divide by zero $divbyzero = '<err: divide by zero>'; foreach $stat (sort keys %$refhash) { $refvalue = $$refhash{$stat}; $newvalue = $$newhash{$stat}; if (!defined($newvalue)) { # stat missing from new file push @missing_stats, $stat; next; } if ($stat =~ /($key_stat_pattern)/o) { # key statistics: always record & display changes in these push @key_stats, [$stat, $refvalue, $newvalue]; } if ($ignore{$stat} or $refvalue eq $newvalue) { # stat is in "ignore" list, or hasn't changed } else { if ($refvalue eq $divbyzero || $newvalue eq $divbyzero) { # one or the other was a divide by zero: # no point in trying to quantify error print "$stat: $refvalue --> $newvalue\n"; } else { $reldiff = pct_diff($refvalue, $newvalue); $diffmag = abs($reldiff); if ($diffmag > $err_thresh) { push @errs, [$stat, $refvalue, $newvalue, $reldiff]; } if ($diffmag > $max_err_mag) { $max_err_mag = $diffmag; } } } # remove from new hash so we can detect added stats delete $$newhash{$stat}; } # # All done. Print comparison summary. # printf("Maximum error magnitude: %+f%%\n\n", $max_err_mag); printf(" %-30s %10s %10s %10s %7s\n", ' ', 'Reference', 'New Value', 'Abs Diff', 'Pct Chg'); printf("Key statistics:\n\n"); foreach $key_stat (@key_stats) { ($statname, $refvalue, $newvalue, $reldiff) = @$key_stat; # deduce format from reference value $pointpos = rindex($refvalue, '.'); $digits = ($pointpos < 0) ? 0 :(length($refvalue) - $pointpos - 1); $fmt = "%10.${digits}f"; # print differing values with absolute and relative error printf(" %-30s $fmt $fmt $fmt %+7.2f%%\n", $statname, $refvalue, $newvalue, $newvalue - $refvalue, pct_diff($refvalue, $newvalue)); } printf("\nLargest $omit_count relative errors (> %d%%):\n\n", $err_thresh); $num_errs = 0; if ($opt_p) { # sort differences by percent change @errs = sort { abs($$b[3]) <=> abs($$a[3]) } @errs; } foreach $err (@errs) { ($statname, $refvalue, $newvalue, $reldiff) = @$err; # deduce format from reference value $pointpos1 = rindex($refvalue, '.'); $digits1 = ($pointpos1 < 0) ? 0 :(length($refvalue) - $pointpos1 - 1); $pointpos2 = rindex($newvalue, '.'); $digits2 = ($pointpos2 < 0) ? 0 :(length($newvalue) - $pointpos2 - 1); $digits = ($digits1 > $digits2) ? $digits1 : $digits2; $fmt = "%10.${digits}f"; # print differing values with absolute and relative error printf(" %-30s $fmt $fmt $fmt %+7.2f%%\n", $statname, $refvalue, $newvalue, $newvalue - $refvalue, $reldiff); # only print top N errors if (++$num_errs >= $omit_count) { print "[... additional errors omitted ...]\n"; last; } } # # Report missing stats, but first filter out distribution buckets: # these are mostly noise @missing_stats = grep { !/::(\d+|overflows)?$/ } @missing_stats; # get count $missing_stats = scalar(@missing_stats); if ($missing_stats) { print "\nMissing $missing_stats reference statistics:\n\n"; foreach $stat (@missing_stats) { # print "\t$stat\n"; printf " %-50s ", $stat; print "$$refhash{$stat}\n"; } } # # Any stats left in newhash are added since the reference file # @added_stats = keys %$newhash; # first filter out distribution buckets: mostly noise @added_stats = grep { !/::(\d+|overflows)?$/ } @added_stats; # get count $added_stats = scalar(@added_stats); if ($added_stats) { print "\nFound $added_stats new statistics:\n\n"; foreach $stat (sort @added_stats) { # print "\t$stat\n"; printf " %-50s ", $stat; print "$$newhash{$stat}\n"; } } cleanup(); # Exit code is 0 if some stats found & no stats error, 1 otherwise $status = ($#key_stats >= 0 && $max_err_mag == 0.0) ? 0 : 1; exit $status; sub cleanup { unlink($refheader) if ($refheader); unlink($newheader) if ($newheader); }