Beefy Boxes and Bandwidth Generously Provided by pair Networks
Just another Perl shrine
 
PerlMonks  

recursively comparing heterogenous data structures

by thraxil (Prior)
on Jul 06, 2001 at 20:14 UTC ( #94507=perlcraft: print w/ replies, xml ) Need Help??

   1: #!/usr/bin/perl -w
   2: use strict;
   3: 
   4: ## recursively comparing arbitrary heterogenous data structures
   5: ## an experiment in functional programming idioms implemented in perl
   6: ##
   7: ## by anders pearson <anders@columbia.edu>
   8: ## 2001-07-06
   9: ##
  10: ## functions to traverse two arbitrary complex data structures
  11: ## (lists of lists, lists of hashes, lists of hashes of lists and scalars,
  12: ## etc, etc) comparing them by value. 
  13: ## 
  14: ## known issues: 
  15: ##   - does not deal with GLOB,CODE,LVALUE or other more exotic types
  16: ##   - makes no provision for avoiding circular references.
  17: ##     ie, it WILL get stuck on them
  18: ##
  19: ## enjoy and let me know if you see any problems or can think of
  20: ## better ways to do anything.
  21: 
  22: ########### driver functions
  23: 
  24: # takes references to two data structures and returns
  25: # 1 if they are different, 0 if they're the same
  26: # order agnostic (ie ['foo','bar'] == ['bar','foo'])
  27: 
  28: sub diff {
  29:     my $r1 = shift;
  30:     my $r2 = shift;
  31:     # ld expects references to lists
  32:     if ("ARRAY" eq ref $r1 && "ARRAY" eq ref $r2) {
  33:         return &ld("","",$r1,$r2,0,1);
  34:     } else {
  35: 	# if they're not references to lists, we just make them
  36:         return &ld("","",[$r1],[$r2],0,1);
  37:     }
  38: }
  39: 
  40: # same as diff but not order agnostic
  41: # ['foo','bar'] != ['bar','foo']
  42: sub diff_order {
  43:     my $r1 = shift;
  44:     my $r2 = shift;
  45:     # ld expects references to lists
  46:     if ("ARRAY" eq ref $r1 && "ARRAY" eq ref $r2) {
  47:         return &ld("","",$r1,$r2,0,0);
  48:     } else {
  49: 	# if they're not references to arrays, we just make them
  50:         return &ld("","",[$r1],[$r2],0,0);
  51:     }
  52: }   
  53: 
  54: # recursively compares two lists by value
  55: # works for damn near any reasonably complex structure
  56: # including: lists of scalars, lists of lists, lists of hashes, 
  57: # lists of hashes of lists of arrays of scalars, etc, etc.
  58: # arguably should be called data_structures_diff
  59: # argument $order == 1 means that we don't care about the order
  60: # ie ['foo','bar'] == ['bar','foo']
  61: 
  62: sub ld {
  63:     my $x      = shift;       # first element of first list
  64:     my $y      = shift;       # first element of second list
  65:     my $r1     = shift;       # reference to rest of first list
  66:     my $r2     = shift;       # reference to rest of second list
  67:     my $sorted = shift;       # whether or not the lists have been sorted
  68:     my $order  = shift;       # whether we're order agnostic with lists
  69: 
  70:     my $DIFFERENT = 1;
  71:     my $SAME      = 0;
  72: 
  73:     my @xs = @$r1;
  74:     my @ys = @$r2;
  75: 
  76:     if(!$sorted && $order) {
  77: 	@xs = sort @xs;
  78: 	@ys = sort @ys;
  79: 	$sorted = 1;
  80:     }
  81: 
  82:     if ($#xs != $#ys) {
  83: 	# lists are different lengths, so we know right off that
  84: 	# they must not be the same.
  85: 	return $DIFFERENT;
  86:     } else {
  87: 
  88: 	# lists are the same length, so we compare $x and $y
  89: 	# based on what they are
  90: 	if (!ref $x) {
  91: 
  92: 	    # make sure $y isn't a reference either
  93: 	    return $DIFFERENT if ref $y;
  94: 
  95: 	    # both scalars, compare them
  96: 	    return $DIFFERENT if $x ne $y;
  97: 	} else {
  98: 
  99: 	    # we're dealing with references now
 100: 	    if (ref $x ne ref $y) {
 101: 
 102: 		# they're entirely different data types
 103: 		return $DIFFERENT;
 104: 	    } elsif ("SCALAR" eq ref $x) {
 105: 
 106: 		# some values that we can actually compare
 107: 		return $DIFFERENT if $$x ne $$y;
 108: 	    } elsif ("REF" eq ref $x) {
 109: 
 110: 		# yes, we even handle references to references to references...
 111: 		return $DIFFERENT if &ld($$x,$$y,[],[],0,$order);
 112: 	    } elsif ("HASH" eq ref $x) {
 113: 
 114: 		# references to hashes are a little tricky
 115: 		# we make arrays of keys and values (keeping
 116: 		# the values in order relative to the keys)
 117: 		# and compare those.
 118: 		my @kx = sort keys %$x;
 119: 		my @ky = sort keys %$y;
 120: 		my @vx = map {$$x{$_}} @kx;
 121: 		my @vy = map {$$y{$_}} @ky;
 122: 		return $DIFFERENT
 123: 		    if &ld("", "", \@kx,\@ky,1,$order) || 
 124: 			&ld("", "", \@vx,\@vy,1,$order);
 125: 	    } elsif ("ARRAY" eq ref $x) {
 126: 		return $DIFFERENT if &ld("","",$x,$y,0,$order);
 127: 	    } else {
 128: 		# don't know how to compare anything else
 129: 		die "sorry, can't compare type ", ref $x;
 130: 	    }
 131: 	}
 132: 	if (-1 == $#xs) {
 133: 
 134: 	    # no elements left in list, this is the base case.
 135: 	    return $SAME;
 136: 	} else {
 137: 	    return &ld(shift @xs,shift @ys,\@xs,\@ys,$sorted,$order);
 138: 	}
 139: 
 140:     }
 141: }
 142: 
 143: # some simple examples
 144: my @l1 = qw/foo bar baz/;
 145: my @l2 = qw/bar foo baz/;
 146: 
 147: print "d:  ", &diff(\@l1,\@l2), "\n";
 148: print "do: ", &diff_order(\@l1,@l2), "\n";
 149: push @l1, {x => 'y'};
 150: print "d:  ", &diff(\@l1,\@l2), "\n";
 151: print "do: ", &diff_order(\@l1,@l2), "\n";
 152: push @l2, {x => 'y'};
 153: print "d:  ", &diff(\@l1,\@l2), "\n";
 154: print "do: ", &diff_order(\@l1,@l2), "\n";
 155: push @l1, [1,2,3];
 156: push @l2, [3,2,1];
 157: print "d:  ", &diff(\@l1,\@l2), "\n";
 158: print "do: ", &diff_order(\@l1,@l2), "\n";
 159: 
 160: __END__

Comment on recursively comparing heterogenous data structures
Download Code
Re: recursively comparing heterogenous data structures
by bikeNomad (Priest) on Jul 06, 2001 at 21:00 UTC
    You can generalize this into a more useful routine by allowing the user to provide optional subroutine refs that will be called when:<bl>
  • A has something that B doesn't
  • B has something that A doesn't
  • A and B have different values
  • A and B have same values </bl>

    See Algorithm::Diff for an example of passing in such subroutines. And you should also provide for an extra scalar that will be passed to the user subroutines, so the user can provide context to them without using globals.

Re: recursively comparing heterogenous data structures
by princepawn (Parson) on Jul 06, 2001 at 23:22 UTC
    lar to my Array::PatternMatcher and could be the basis of something which should be named Data::RecDescent because it is similar to cpan:://Parse::RecDescent but you are trying to traverse something other than structured text.

    And one other thing is that comparison varies based on what you are comparing --- eq will not work for floating point numbers always.

Re: recursively comparing heterogenous data structures
by premchai21 (Curate) on Jul 07, 2001 at 07:11 UTC
Re: recursively comparing heterogenous data structures
by bikeNomad (Priest) on Jul 07, 2001 at 22:01 UTC
    Another way to compare arbitrary data structures (if you just need to see if they're different, and don't care about CODE or GLOB reference differences internally):

    use Storable; $Storable::canonical = 1; my compare { Storable::freeze($_[0]) eq Storable::freeze($_[1]) }

    update:The above gets hashes right (by sorting their keys); however, it doesn't allow ignoring ordering of lists as the original code does.

      that's pretty useful. unfortunately, it is order dependent. i wrote the code above because i had data structures to compare and i didn't care about the order of any lists within; just whether or not they had the same elements.

      anders pearson

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlcraft [id://94507]
Approved by root
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others studying the Monastery: (9)
As of 2014-07-12 06:16 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    When choosing user names for websites, I prefer to use:








    Results (238 votes), past polls