0: #!/usr/bin/perl -w
1: use strict;
2:
3: ## recursively comparing arbitrary heterogenous data structures
4: ## an experiment in functional programming idioms implemented in perl
5: ##
6: ## by anders pearson <anders@columbia.edu>
7: ## 2001-07-06
8: ##
9: ## functions to traverse two arbitrary complex data structures
10: ## (lists of lists, lists of hashes, lists of hashes of lists and scalars,
11: ## etc, etc) comparing them by value.
12: ##
13: ## known issues:
14: ## - does not deal with GLOB,CODE,LVALUE or other more exotic types
15: ## - makes no provision for avoiding circular references.
16: ## ie, it WILL get stuck on them
17: ##
18: ## enjoy and let me know if you see any problems or can think of
19: ## better ways to do anything.
20:
21: ########### driver functions
22:
23: # takes references to two data structures and returns
24: # 1 if they are different, 0 if they're the same
25: # order agnostic (ie ['foo','bar'] == ['bar','foo'])
26:
27: sub diff {
28: my $r1 = shift;
29: my $r2 = shift;
30: # ld expects references to lists
31: if ("ARRAY" eq ref $r1 && "ARRAY" eq ref $r2) {
32: return &ld("","",$r1,$r2,0,1);
33: } else {
34: # if they're not references to lists, we just make them
35: return &ld("","",[$r1],[$r2],0,1);
36: }
37: }
38:
39: # same as diff but not order agnostic
40: # ['foo','bar'] != ['bar','foo']
41: sub diff_order {
42: my $r1 = shift;
43: my $r2 = shift;
44: # ld expects references to lists
45: if ("ARRAY" eq ref $r1 && "ARRAY" eq ref $r2) {
46: return &ld("","",$r1,$r2,0,0);
47: } else {
48: # if they're not references to arrays, we just make them
49: return &ld("","",[$r1],[$r2],0,0);
50: }
51: }
52:
53: # recursively compares two lists by value
54: # works for damn near any reasonably complex structure
55: # including: lists of scalars, lists of lists, lists of hashes,
56: # lists of hashes of lists of arrays of scalars, etc, etc.
57: # arguably should be called data_structures_diff
58: # argument $order == 1 means that we don't care about the order
59: # ie ['foo','bar'] == ['bar','foo']
60:
61: sub ld {
62: my $x = shift; # first element of first list
63: my $y = shift; # first element of second list
64: my $r1 = shift; # reference to rest of first list
65: my $r2 = shift; # reference to rest of second list
66: my $sorted = shift; # whether or not the lists have been sorted
67: my $order = shift; # whether we're order agnostic with lists
68:
69: my $DIFFERENT = 1;
70: my $SAME = 0;
71:
72: my @xs = @$r1;
73: my @ys = @$r2;
74:
75: if(!$sorted && $order) {
76: @xs = sort @xs;
77: @ys = sort @ys;
78: $sorted = 1;
79: }
80:
81: if ($#xs != $#ys) {
82: # lists are different lengths, so we know right off that
83: # they must not be the same.
84: return $DIFFERENT;
85: } else {
86:
87: # lists are the same length, so we compare $x and $y
88: # based on what they are
89: if (!ref $x) {
90:
91: # make sure $y isn't a reference either
92: return $DIFFERENT if ref $y;
93:
94: # both scalars, compare them
95: return $DIFFERENT if $x ne $y;
96: } else {
97:
98: # we're dealing with references now
99: if (ref $x ne ref $y) {
100:
101: # they're entirely different data types
102: return $DIFFERENT;
103: } elsif ("SCALAR" eq ref $x) {
104:
105: # some values that we can actually compare
106: return $DIFFERENT if $$x ne $$y;
107: } elsif ("REF" eq ref $x) {
108:
109: # yes, we even handle references to references to references...
110: return $DIFFERENT if &ld($$x,$$y,[],[],0,$order);
111: } elsif ("HASH" eq ref $x) {
112:
113: # references to hashes are a little tricky
114: # we make arrays of keys and values (keeping
115: # the values in order relative to the keys)
116: # and compare those.
117: my @kx = sort keys %$x;
118: my @ky = sort keys %$y;
119: my @vx = map {$$x{$_}} @kx;
120: my @vy = map {$$y{$_}} @ky;
121: return $DIFFERENT
122: if &ld("", "", \@kx,\@ky,1,$order) ||
123: &ld("", "", \@vx,\@vy,1,$order);
124: } elsif ("ARRAY" eq ref $x) {
125: return $DIFFERENT if &ld("","",$x,$y,0,$order);
126: } else {
127: # don't know how to compare anything else
128: die "sorry, can't compare type ", ref $x;
129: }
130: }
131: if (-1 == $#xs) {
132:
133: # no elements left in list, this is the base case.
134: return $SAME;
135: } else {
136: return &ld(shift @xs,shift @ys,\@xs,\@ys,$sorted,$order);
137: }
138:
139: }
140: }
141:
142: # some simple examples
143: my @l1 = qw/foo bar baz/;
144: my @l2 = qw/bar foo baz/;
145:
146: print "d: ", &diff(\@l1,\@l2), "\n";
147: print "do: ", &diff_order(\@l1,@l2), "\n";
148: push @l1, {x => 'y'};
149: print "d: ", &diff(\@l1,\@l2), "\n";
150: print "do: ", &diff_order(\@l1,@l2), "\n";
151: push @l2, {x => 'y'};
152: print "d: ", &diff(\@l1,\@l2), "\n";
153: print "do: ", &diff_order(\@l1,@l2), "\n";
154: push @l1, [1,2,3];
155: push @l2, [3,2,1];
156: print "d: ", &diff(\@l1,\@l2), "\n";
157: print "do: ", &diff_order(\@l1,@l2), "\n";
158:
159: __END__
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.