summaryrefslogtreecommitdiff
path: root/util/rundiff
diff options
context:
space:
mode:
Diffstat (limited to 'util/rundiff')
-rw-r--r--util/rundiff511
1 files changed, 511 insertions, 0 deletions
diff --git a/util/rundiff b/util/rundiff
new file mode 100644
index 000000000..064e7e136
--- /dev/null
+++ b/util/rundiff
@@ -0,0 +1,511 @@
+#!/usr/bin/perl
+
+# Copyright (c) 2001 Nathan L. Binkert
+# All rights reserved.
+#
+# Permission to redistribute, use, copy, and modify this software
+# without fee is hereby granted, provided that the following
+# conditions are met:
+#
+# 1. This entire notice is included in all source code copies of any
+# software which is or includes a copy or modification of this
+# software.
+# 2. The name of the author may not be used to endorse or promote
+# products derived from this software without specific prior
+# written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+use Algorithm::Diff qw(diff);
+use vars qw ($opt_C $opt_c $opt_u $opt_U);
+
+$opt_u = "";
+$opt_c = undef;
+
+$diffsize = 2000;
+# After we've read up to a certain point in each file, the number of items
+# we've read from each file will differ by $FLD (could be 0)
+my $File_Length_Difference = 0;
+my $Context_Lines = 9;
+
+$progname = $0;
+if (scalar(@ARGV) != 2) {
+ usage();
+}
+
+my ($filename1, $filename2);
+($filename1, $start1) = parse_filearg($ARGV[0]);
+($filename2, $start2) = parse_filearg($ARGV[1]);
+
+if ($filename1 eq "-" && $filename2 eq "-") {
+ die "Only one of the inputs may be standard in\n";
+}
+
+my ($file1, $file2);
+if ($filename1 eq "-") {
+ $file1 = STDIN;
+} else {
+ open(FILE1, $filename1) || die "can't open $file1: $!\n";
+ $file1 = FILE1;
+}
+
+if ($filename2 eq "-") {
+ $file2 = STDIN;
+} else {
+ open(FILE2, $filename2) || die "can't open $file2: $!\n";
+ $file2 = FILE2;
+}
+
+my $file_offset1 = ffw($file1, $start1);
+my $file_offset2 = ffw($file2, $start2);
+
+$skip_first = 0;
+my (@buf1, @buf2, @printbuf1, @printbuf2);
+
+$Compare_Ahead = 0;
+
+while (!eof($file1) && !eof($file2)) {
+ my $line1 = <$file1>; chomp $line1;
+ my $line2 = <$file2>; chomp $line2;
+ my $printline1 = $line1;
+ my $printline2 = $line2;
+
+ push @buf1, $line1;
+ push @buf2, $line2;
+ push @printbuf1, $printline1;
+ push @printbuf2, $printline2;
+
+# while ($Compare_Ahead < $Context_Lines) {
+# $line1 = @buf1[$Compare_Ahead];
+# $line2 = @buf2[$Compare_Ahead];
+# $line2 =~ s/ *--.*$//;
+# if ($line1 ne $line2) { last; }
+# ++$Compare_Ahead;
+# }
+
+ $line1 = @buf1[$Compare_Ahead];
+ $line2 = @buf2[$Compare_Ahead];
+ $line2 =~ s/ *--.*$//;
+
+ if ($line1 ne $line2) {
+ while (!eof($file1) && scalar(@buf1) < $diffsize) {
+ $line = <$file1>; chomp $line;
+ my $printline = $line;
+
+ push @printbuf1, $printline;
+ push @buf1, $line;
+ }
+
+ while (!eof($file2) && scalar(@buf2) < $diffsize) {
+ $line = <$file2>; chomp $line;
+ my $printline = $line;
+# $line =~ s/ *--.*$//;
+
+ push @printbuf2, $printline;
+ push @buf2, $line;
+ }
+
+ my $diffs = diff(\@buf1, \@buf2);
+
+ next unless @$diffs;
+
+ my @hunklist;
+ my ($hunk,$oldhunk);
+ # Loop over hunks. If a hunk overlaps with the last hunk, join them.
+ # Otherwise, print out the old one.
+ foreach my $piece (@$diffs) {
+ $hunk = new Hunk ($piece, $Context_Lines, scalar(@buf1));
+ next unless $oldhunk;
+
+ if ($hunk->does_overlap($oldhunk)) {
+ $hunk->prepend_hunk($oldhunk);
+ } else {
+ push @hunklist, $oldhunk;
+ }
+ } continue {
+ $oldhunk = $hunk;
+ }
+
+ my $change = 0;
+ while (scalar(@hunklist) && !$change) {
+ $hunk = pop @hunklist;
+ $change = $hunk->{"change"};
+ }
+ push @hunklist, $hunk;
+ $last_start1 = $hunk->{"start1"};
+ $last_start2 = $hunk->{"start2"};
+ $last_end1 = $hunk->{"end1"};
+ $last_end2 = $hunk->{"end2"};
+
+ while (scalar(@hunklist)) {
+ $hunk = shift @hunklist;
+# $hunk->output_diff(\@buf1, \@buf2);
+ $hunk->output_diff(\@printbuf1, \@printbuf2);
+ }
+
+ $last_end1 -= $Context_Lines - 1;
+ $last_end2 -= $Context_Lines - 1;
+ $file_offset1 += $last_end1;
+ $file_offset2 += $last_end2;
+ @printbuf1 = @printbuf1[$last_end1..$#printbuf1];
+ @printbuf2 = @printbuf2[$last_end2..$#printbuf2];
+ @buf1 = @buf1[$last_end1..$#buf1];
+ @buf2 = @buf2[$last_end2..$#buf2];
+ while (scalar(@buf1) > $Context_Lines &&
+ scalar(@buf2) > $Context_Lines) {
+ $foo1 = @buf1[$Context_Lines];
+ $foo2 = @buf2[$Context_Lines];
+ if (scalar($foo1) != scalar($foo2) || $foo1 ne $foo2) { last; }
+ $foo1 = shift @printbuf1;
+ $foo2 = shift @printbuf2;
+ $foo1 = shift @buf1;
+ $foo2 = shift @buf2;
+ ++$file_offset1;
+ ++$file_offset2;
+ }
+ } else {
+ ++$file_offset1;
+ ++$file_offset2;
+ $foo1 = shift @printbuf1;
+ $foo2 = shift @printbuf2;
+ $foo1 = shift @buf1;
+ $foo2 = shift @buf2;
+ }
+}
+
+close $file1;
+close $file2;
+
+sub ffw() {
+ if (scalar(@_) != 2) { die "improper usage of ffw\n"; }
+
+ my $FILE = $_[0];
+ my $start = $_[1];
+ my $count = 0;
+
+ while ($start-- > 0 && !eof($FILE)) {
+ <$FILE>;
+ $count++;
+ }
+
+ if ($start > 0) {die "File too short for ffw amount\n"; }
+ return $count;
+}
+
+sub parse_filearg() {
+ $start = 0;
+ split /:/, @_[0];
+ if (scalar(@_) > 2) { usage(); }
+
+ $file = $_[0];
+ if (scalar(@_) > 1) { $start = $_[1]; }
+
+ return ($file, $start);
+}
+
+sub usage() {
+ printf "usage: $progname <file1>[:start] <file2>[:start]\n";
+ exit 1;
+}
+
+
+# Package Hunk. A Hunk is a group of Blocks which overlap because of the
+# context surrounding each block. (So if we're not using context, every
+# hunk will contain one block.)
+{
+package Hunk;
+
+sub new {
+# Arg1 is output from &LCS::diff (which corresponds to one Block)
+# Arg2 is the number of items (lines, e.g.,) of context around each block
+#
+# This subroutine changes $File_Length_Difference
+#
+# Fields in a Hunk:
+# blocks - a list of Block objects
+# start - index in file 1 where first block of the hunk starts
+# end - index in file 1 where last block of the hunk ends
+#
+# Variables:
+# before_diff - how much longer file 2 is than file 1 due to all hunks
+# until but NOT including this one
+# after_diff - difference due to all hunks including this one
+ my ($class, $piece, $context_items, $maxlen) = @_;
+
+ my $block = new Block ($piece); # this modifies $FLD!
+
+ my $before_diff = $File_Length_Difference; # BEFORE this hunk
+ my $after_diff = $before_diff + $block->{"length_diff"};
+ $File_Length_Difference += $block->{"length_diff"};
+
+ # @remove_array and @insert_array hold the items to insert and remove
+ # Save the start & beginning of each array. If the array doesn't exist
+ # though (e.g., we're only adding items in this block), then figure
+ # out the line number based on the line number of the other file and
+ # the current difference in file lenghts
+ my @remove_array = $block->remove;
+ my @insert_array = $block->insert;
+ my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2, $change);
+ $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
+ $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
+ $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
+ $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
+
+ $start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
+ $end1 = $a2 == -1 ? $b2 - $after_diff : $a2;
+ $start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
+ $end2 = $b2 == -1 ? $a2 + $after_diff : $b2;
+ $change = scalar(@remove_array) && scalar(@insert_array);
+
+ # At first, a hunk will have just one Block in it
+ my $hunk = {
+ "start1" => $start1,
+ "start2" => $start2,
+ "end1" => $end1,
+ "end2" => $end2,
+ "maxlen" => $maxlen,
+ "change" => $change,
+ "blocks" => [$block],
+ };
+ bless $hunk, $class;
+
+ $hunk->flag_context($context_items);
+
+ return $hunk;
+}
+
+# Change the "start" and "end" fields to note that context should be added
+# to this hunk
+sub flag_context {
+ my ($hunk, $context_items) = @_;
+ return unless $context_items; # no context
+
+ # add context before
+ my $start1 = $hunk->{"start1"};
+ my $num_added = $context_items > $start1 ? $start1 : $context_items;
+ $hunk->{"start1"} -= $num_added;
+ $hunk->{"start2"} -= $num_added;
+
+ # context after
+ my $end1 = $hunk->{"end1"};
+ $num_added = ($end1+$context_items > $hunk->{"maxlen"}) ?
+ $hunk->{"maxlen"} - $end1 :
+ $context_items;
+ $hunk->{"end1"} += $num_added;
+ $hunk->{"end2"} += $num_added;
+}
+
+# Is there an overlap between hunk arg0 and old hunk arg1?
+# Note: if end of old hunk is one less than beginning of second, they overlap
+sub does_overlap {
+ my ($hunk, $oldhunk) = @_;
+ return "" unless $oldhunk; # first time through, $oldhunk is empty
+
+ # Do I actually need to test both?
+ return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
+ $hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
+}
+
+# Prepend hunk arg1 to hunk arg0
+# Note that arg1 isn't updated! Only arg0 is.
+sub prepend_hunk {
+ my ($hunk, $oldhunk) = @_;
+
+ $hunk->{"start1"} = $oldhunk->{"start1"};
+ $hunk->{"start2"} = $oldhunk->{"start2"};
+
+ unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
+}
+
+
+# DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
+sub output_diff {
+ if (defined $main::opt_u) {&output_unified_diff(@_)}
+ elsif (defined $main::opt_c) {&output_context_diff(@_)}
+ else {die "unknown diff"}
+}
+
+sub output_unified_diff {
+ my ($hunk, $fileref1, $fileref2) = @_;
+ my @blocklist;
+
+ # Calculate item number range.
+ my $range1 = $hunk->unified_range(1, $file_offset1);
+ my $range2 = $hunk->unified_range(2, $file_offset2);
+ print "@@ -$range1 +$range2 @@\n";
+
+ # Outlist starts containing the hunk of file 1.
+ # Removing an item just means putting a '-' in front of it.
+ # Inserting an item requires getting it from file2 and splicing it in.
+ # We splice in $num_added items. Remove blocks use $num_added because
+ # splicing changed the length of outlist.
+ # We remove $num_removed items. Insert blocks use $num_removed because
+ # their item numbers---corresponding to positions in file *2*--- don't take
+ # removed items into account.
+ my $low = $hunk->{"start1"};
+ my $hi = $hunk->{"end1"};
+ my ($num_added, $num_removed) = (0,0);
+ my @outlist = @$fileref1[$low..$hi];
+ map {s/^/ /} @outlist; # assume it's just context
+
+ foreach my $block (@{$hunk->{"blocks"}}) {
+ foreach my $item ($block->remove) {
+ my $op = $item->{"sign"}; # -
+ my $offset = $item->{"item_no"} - $low + $num_added;
+ $outlist[$offset] =~ s/^ /$op/;
+ $num_removed++;
+ }
+ foreach my $item ($block->insert) {
+ my $op = $item->{"sign"}; # +
+ my $i = $item->{"item_no"};
+ my $offset = $i - $hunk->{"start2"} + $num_removed;
+ splice(@outlist,$offset,0,"$op$$fileref2[$i]");
+ $num_added++;
+ }
+ }
+
+ map {s/$/\n/} @outlist; # add \n's
+ print @outlist;
+
+}
+
+sub output_context_diff {
+ my ($hunk, $fileref1, $fileref2) = @_;
+ my @blocklist;
+
+ print "***************\n";
+ # Calculate item number range.
+ my $range1 = $hunk->context_range(1, $file_offset1);
+ my $range2 = $hunk->context_range(2, $file_offset2);
+
+ # Print out file 1 part for each block in context diff format if there are
+ # any blocks that remove items
+ print "*** $range1 ****\n";
+ my $low = $hunk->{"start1"};
+ my $hi = $hunk->{"end1"};
+ if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) {
+ my @outlist = @$fileref1[$low..$hi];
+ map {s/^/ /} @outlist; # assume it's just context
+ foreach my $block (@blocklist) {
+ my $op = $block->op; # - or !
+ foreach my $item ($block->remove) {
+ $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
+ }
+ }
+ map {s/$/\n/} @outlist; # add \n's
+ print @outlist;
+ }
+
+ print "--- $range2 ----\n";
+ $low = $hunk->{"start2"};
+ $hi = $hunk->{"end2"};
+ if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) {
+ my @outlist = @$fileref2[$low..$hi];
+ map {s/^/ /} @outlist; # assume it's just context
+ foreach my $block (@blocklist) {
+ my $op = $block->op; # + or !
+ foreach my $item ($block->insert) {
+ $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
+ }
+ }
+ map {s/$/\n/} @outlist; # add \n's
+ print @outlist;
+ }
+}
+
+sub context_range {
+# Generate a range of item numbers to print. Only print 1 number if the range
+# has only one item in it. Otherwise, it's 'start,end'
+ my ($hunk, $flag, $offset) = @_;
+ my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
+
+ # index from 1, not zero
+ $start += $offset + 1;
+ $end += $offset + 1;
+ my $range = ($start < $end) ? "$start,$end" : $end;
+ return $range;
+}
+
+sub unified_range {
+# Generate a range of item numbers to print for unified diff
+# Print number where block starts, followed by number of lines in the block
+# (don't print number of lines if it's 1)
+ my ($hunk, $flag, $offset) = @_;
+ my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
+
+ # index from 1, not zero
+ $start += $offset + 1;
+ $end += $offset + 1;
+ my $length = $end - $start + 1;
+ my $first = $length < 2 ? $end : $start; # strange, but correct...
+ my $range = $length== 1 ? $first : "$first,$length";
+ return $range;
+}
+} # end Package Hunk
+
+# Package Block. A block is an operation removing, adding, or changing
+# a group of items. Basically, this is just a list of changes, where each
+# change adds or deletes a single item.
+# (Change could be a separate class, but it didn't seem worth it)
+{
+package Block;
+sub new {
+# Input is a chunk from &Algorithm::LCS::diff
+# Fields in a block:
+# length_diff - how much longer file 2 is than file 1 due to this block
+# Each change has:
+# sign - '+' for insert, '-' for remove
+# item_no - number of the item in the file (e.g., line number)
+# We don't bother storing the text of the item
+#
+ my ($class,$chunk) = @_;
+ my @changes = ();
+
+# This just turns each change into a hash.
+ foreach my $item (@$chunk) {
+ my ($sign, $item_no, $text) = @$item;
+ my $hashref = {"sign" => $sign, "item_no" => $item_no};
+ push @changes, $hashref;
+ }
+
+ my $block = { "changes" => \@changes };
+ bless $block, $class;
+
+ $block->{"length_diff"} = $block->insert - $block->remove;
+ return $block;
+}
+
+
+# LOW LEVEL FUNCTIONS
+sub op {
+# what kind of block is this?
+ my $block = shift;
+ my $insert = $block->insert;
+ my $remove = $block->remove;
+
+ $remove && $insert and return '!';
+ $remove and return '-';
+ $insert and return '+';
+ warn "unknown block type";
+ return '^'; # context block
+}
+
+# Returns a list of the changes in this block that remove items
+# (or the number of removals if called in scalar context)
+sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
+
+# Returns a list of the changes in this block that insert items
+sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
+
+} # end of package Block