util/rundiff


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298

#! /usr/bin/env perl

# Copyright (c) 2003 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Diff two streams.
#
# Unlike regular diff, this script does not read in the entire input
# before doing a diff, so it can be used on lengthy outputs piped from
# other programs (e.g., M5 traces).  The best way to do this is to
# take advantage of the power of Perl's open function, which will
# automatically fork a subprocess if the last character in the
# "filename" is a pipe (|).  Thus to compare the instruction traces
# from two versions of m5 (m5a and m5b), you can do this:
#
# rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |'
#

use strict;

use Getopt::Std;

#
# Options:
#  -c <n> : print n lines of context before & after changes
#  -l <n> : use n lines of lookahead
#  -x     : use "complex" diff from Algorithm::Diff (see below)
#
our ($opt_c, $opt_l, $opt_x);
getopts('c:l:x');

#
# For the highest-quality (minimal) diffs, we can use the
# Algorithm::Diff package.  By default, a built-in, simple, and
# generally quite adequate algorithm will be used.  If you have
# Algorithm::Diff installed on your system, and don't mind having the
# script go slower (like 3-4x slower, based on informal observation),
# then specify '-x' on the command line to use it.
my $use_complexdiff = defined($opt_x);

if ($use_complexdiff) {
    # Don't use 'use', as that's a compile-time option and will fail
    # on systems that don't have Algorithm::Diff installed even if
    # $use_complexdiff is false.  'require' is evaluated at runtime,
    # so it's OK.
    require Algorithm::Diff;
    import Algorithm::Diff qw(traverse_sequences);
};

my $lookahead_lines = $opt_l || 200;

# in theory you could have different amounts of context before and
# after a diff, but until someone needs that there's only one arg to
# set both.
my $precontext_lines = $opt_c || 3;
my $postcontext_lines = $precontext_lines;

my $file1 = $ARGV[0];
my $file2 = $ARGV[1];

die "Need two args." if (!(defined($file1) && defined($file2)));

my ($fh1, $fh2);
open($fh1, $file1) or die "Can't open $file1";
open($fh2, $file2) or die "Can't open $file2";

# buffer of matching lines for pre-diff context
my @precontext = ();
# number of post-diff matching lines remaining to print
my $postcontext = 0;

# lookahead buffers for $file1 and $file2 respectively
my @lines1 = ();
my @lines2 = ();

# Next line number available to print from each file.  Generally this
# corresponds to the oldest line in @precontext, or the oldest line in
# @lines1 and @lines2 if @precontext is empty.
my $lineno1 = 1;
my $lineno2 = 1;

# Fill a lookahead buffer to $lookahead_lines lines (or until EOF).
sub fill
{
    my ($fh, $array) = @_;

    while (@$array < $lookahead_lines) {
	my $line = <$fh>;
	last if (!defined($line));
	push @$array, $line;
    }
}

# Print and delete n lines from front of given array with given prefix.
sub printlines
{
    my ($array, $n, $prefix) = @_;

    while ($n--) {
	my $line = shift @$array;
	last if (!defined($line));
	print $prefix, $line;
    }
}

# Print a difference region where n1 lines of file1 were replaced by
# n2 lines of file2 (where either n1 or n2 could be zero).
sub printdiff
{
    my ($n1, $n2)= @_;

    # If the precontext buffer is full or we're at the beginning of a
    # file, then this is a new diff region, so we should print a
    # header indicating the current line numbers.  If we're past the
    # beginning and the precontext buffer isn't full, then whatever
    # we're about to print is contiguous with the end of the last
    # region we printed, so we just concatenate them on the output.
    if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) {
	print "@@ -$lineno1 +$lineno2 @@\n";
    }

    # Print and clear the precontext buffer.
    if (@precontext) {
	print ' ', join(' ', @precontext);
	$lineno1 += scalar(@precontext);
	$lineno2 += scalar(@precontext);
	@precontext = ();
    }

    # Print the differing lines.
    printlines(\@lines1, $n1, '-');
    printlines(\@lines2, $n2, '+');
    $lineno1 += $n1;
    $lineno2 += $n2;

    # Set $postcontext to print the next $postcontext_lines matching lines.
    $postcontext = $postcontext_lines;
}


########################
#
# Complex diff algorithm
#
########################

{
    my $match_found;
    my $discard_lines1;
    my $discard_lines2;

    sub match { $match_found = 1; }
    sub discard1 { $discard_lines1++ unless $match_found; }
    sub discard2 { $discard_lines2++ unless $match_found; }

    sub complex_diff
    {
	$match_found = 0;
	$discard_lines1 = 0;
	$discard_lines2 = 0;

	# See Diff.pm.  Note that even though this call generates a
	# complete diff of both lookahead buffers, all we use it for
	# is to figure out how many lines to discard off the front of
	# each buffer to resync the streams.
	traverse_sequences( \@lines1, \@lines2,
			    { MATCH => \&match,
			      DISCARD_A => \&discard1,
			      DISCARD_B => \&discard2 });

	die "Lost sync!" if (!$match_found);

	# Since we shouldn't get here unless the first lines of the
	# buffers are different, then we must discard some lines off
	# at least one of the buffers.
	die if ($discard_lines1 == 0 && $discard_lines2 == 0);

	printdiff($discard_lines1, $discard_lines2);
    }
}

#######################
#
# Simple diff algorithm
#
#######################

# Check for a pair of matching lines; if found, generate appropriate
# diff output.
sub checkmatch
{
    my ($n1, $n2) = @_;

    # Check if two adjacent lines match, to reduce false resyncs
    # (particularly on unrelated blank lines).  This generates
    # larger-than-necessary diffs when a single line really should be
    # treated as common; if that bugs you, use Algorithm::Diff.
    if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) {
	printdiff($n1, $n2);
    }
}

sub simple_diff
{
    # Look for differences of $cnt lines to resync,
    # increasing $cnt from 1 to $lookahead_lines until we find
    # something.
    for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) {
	# Check for n lines in one file being replaced by
	# n lines in the other.
	return if checkmatch($cnt, $cnt);
	# Find differences where n lines in one file were
	# replaced by m lines in the other.  We let m = $cnt
	# and iterate for n = 0 to $cnt-1.
	for (my $n = 0; $n < $cnt; ++$n) {
	    return if checkmatch($n, $cnt);
	    return if checkmatch($cnt, $n);
	}
    }
    die "Lost sync!";
}

# Set the pointer to the appropriate diff function.
#
# Note that in either case the function determines how many lines to
# discard from the front of each lookahead buffer to resync the
# streams, then prints the appropriate diff output and discards them.
# After the function returns, it should always be the case that
# $lines1[0] eq $lines2[0].
my $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff;

# The main loop.
while (1) {
    # keep lookahead buffers topped up
    fill($fh1, \@lines1);
    fill($fh2, \@lines2);

    # peek at first line in each buffer
    my $l1 = $lines1[0];
    my $l2 = $lines2[0];

    if (!defined($l1) && !defined($l2)) {
	# reached EOF on both streams: exit
	exit(1);
    }

    if ($l1 eq $l2) {
	# matching lines: delete from lookahead buffer
	shift @lines1;
	shift @lines2;
	# figure out what to do with this line
	if ($postcontext > 0) {
	    # we're in the post-context of a diff: print it
	    $postcontext--;
	    print ' ', $l1;
	    $lineno1++;
	    $lineno2++;
	}
	else {
	    # we're in the middle of a matching region... save this
	    # line for precontext in case we run into a difference.
	    push @precontext, $l1;
	    # don't let precontext buffer get bigger than needed
	    while (@precontext > $precontext_lines) {
		shift @precontext;
		$lineno1++;
		$lineno2++;
	    }
	}
    }
    else {
	# Mismatch.  Deal with it.
	&$find_diff();
    }
}