http://www.perlmonks.org?node_id=1139276

This program creates a recursive diff between two directory trees. The directory trees can be local or reachable by ssh. The diff lists added and removed files and for changed files, it lists the differences in diff style. This makes it convenient to review the differences between two machines that should be identical, or to find the steps that are needed to bring one directory tree to another.

After a hint by salva that Net::SSH2 comes included with Strawberry Perl, I was motivated to rip out some system-specific ties to plink.exe and post this. Unfortunately, Net::SSH::Any doesn't seem to have a way to talk to the ssh agent for quick authentification, so this relies on Net::SSH2 instead of Net::SSH::Any.

#!perl -w use strict; use Algorithm::Diff; use Getopt::Long; use File::Find; use File::stat; use Net::SSH2; =head1 NAME diff-servers.pl =head1 ABSTRACT Generate a diff between two directory trees. The directory trees must +be local or reachable via ssh. Only the ssh2 protocol is supported. =head1 SYNOPSIS perl -w diff-servers.pl corion@production:/opt/mychat corion@staging +:/tmp/deploy-0.025 --ignore .bak --ignore .gpg --ignore .pgp --ignore + random_seed Quick diff, showing only added/missing files without comparing their c +ontent perl -w diff-servers.pl c:\mychat\old-versions\0.025 corion@staging: +/tmp/deploy-0.025 -q =head1 OPTIONS =over 4 =item B<--ignore> Regexp of directory entries to ignore =item B<--no-mode> Don't compare the file mode. =item B<--no-owner> Don't compare the file owner. =item B<--quick> Don't compare file contents =item B<--verbose> Be somewhat more verbose =back =head1 PREREQUISITES Currently, the script expects C<find> and C<perl> to be available on t +he remote side. The dependency on C<find> could be eliminated by implementing th +e functionality in Perl. The dependency on C<perl> on the remote side could be elimina +ted by using the SFTP protocol for retrieving the directory tree, at an added compl +exity. =cut GetOptions( 'verbose|v' => \my $verbose, 'ignore|i:s' => \my @ignore, 'no-owner|o' => \my $ignore_owner, 'no-mode|m' => \my $ignore_mode, 'quick|q' => \my $skip_contents, ); use vars qw(%connections); sub run_remote { my( $server, $command )= @_; my $user; if( $server =~ /(.*)\@(.*)/ ) { $user = $1; $server = $2; }; if( ! $connections{ $server }) { my $ssh2 = Net::SSH2->new(); $ssh2->connect($server) or die "Couldn't connect to '$server': + $!"; if ($ssh2->auth( username => $user, interact => 1 )) { $connections{ $server } = $ssh2; } else { die "No auth to $server."; }; }; my $fh = $connections{ $server }->channel; warn "[$command]" if $verbose; $fh->exec($command) or die; my @lines = map {s/\s+$//; $_ } <$fh>; #warn "$server:[$_]" for @lines; return @lines } sub get_local { my( $file )= @_; open my $fh, '<', $file or warn "Couldn't read '$file': $!"; binmode $fh; my @lines = map {s/\s+$//; $_ } <$fh>; return @lines } sub split_serverpath { my( $serverpath ) = @_; if( $serverpath =~ /((?:\w+\@)[\w.]+):(.*)/ ) { return ("$1","$2"); } else { # Must be local return (undef, $serverpath); } }; use Data::Dumper; sub filelist { my( $serverpath ) = @_; my( $host, $dir ) = split_serverpath( $serverpath ); if( $host ) { # Outputs a line per file # mode user group type filename my $uid_gid_file = q!perl -Mstrict -MFile::stat -nle 'next if +/^\s*$/;my $s=stat($_);my($p,$u,$g,$t)=(0,q(-),q(-),q(f)); if($s and +not -l) {$p=$s->mode;$u=(getpwuid($s->uid))[0];$g=(getgrgid($s->gid)) +[0] } else { $t=q(l)}; print sprintf qq(%08o %s %s %s %s), $p, $u,$g, +$t,$_'!; # Read all directory entries my @remote_entries = map { my( $mode,$u,$g,$t,$name ) = split +/ +/, $_, 6 ; $name =~ s!^\Q$dir!!; { user => $u, group => $g, type => +$t, name => $name, mode => $mode }; } run_remote( $host, qq{find '$dir' - +type f -o -type l| $uid_gid_file } ); return @remote_entries; } else{ my @files; find({ wanted => sub { return if -d $_; my $s = stat($_) or warn "Couldn't stat [$_]: $!", return; my $name = $_; my $u='-'; my $g='-'; my $t='f'; my $mode = $s->mode; $name =~ s!^\Q$dir!!; push @files, { user => $u, group => $g, type => $t, name = +> $name, mode => $mode }; }, no_chdir => 1 }, $dir ); #warn "local: $_" for @files; return @files; }; } sub wanted_file { my( $fileinfo )= @_; my $file = $fileinfo->{name}; if( my @why = grep { $file =~ /\Q$_/ } @ignore ) { #warn "Ignoring $file (@why)"; } else { #warn "Allowing [$file] ..."; } ! grep { $file =~ /\Q$_/ } @ignore; } sub diff { my( $name, $server1, $server2 )= @_; my($host1, $path1) = split_serverpath( $server1 ); my($host2, $path2) = split_serverpath( $server2 ); my @left = $host1 ? run_remote( $host1, qq{cat '$path1$name'} ) : + get_local( "$server1$name" ); my @right = $host2 ? run_remote( $host2, qq{cat '$path2$name'} ) : + get_local( "$server2$name" ); my $diff = Algorithm::Diff->new( \@left, \@right ); $diff->Base( 1 ); # Return line numbers, not indices my $has_diff; while( $diff->Next() ) { next if $diff->Same(); if( ! $has_diff ) { $has_diff = 1; print "$name\n"; }; my $sep = ''; if( ! $diff->Items(2) ) { printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 )); } elsif( ! $diff->Items(1) ) { printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 )); } else { $sep = "---\n"; printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 )); } print "< $_\n" for $diff->Items(1); print $sep; print "> $_\n" for $diff->Items(2); } $has_diff }; my( $server1, $server2 )= @ARGV; #warn "Old: $server1"; #warn "New: $server2"; my %left_info = map { $_->{name} => $_ } grep { wanted_file($_) } file +list( $server1 ); my %right_info = map { $_->{name} => $_ } grep { wanted_file($_) } fil +elist( $server2 ); my @left_names = sort keys %left_info; my @right_names = sort keys %right_info; my $filediff = Algorithm::Diff->new( \@left_names, \@right_names ); my @samelist; $filediff->Base( 1 ); # Return line numbers, not indices while( $filediff->Next() ) { if( $filediff->Same() ) { # entry exists in both trees push @samelist, $filediff->Items(1); } else { # Entries only on tree 2, but no symlink my @new_items = grep { ! $right_info{ $_ }->{type} ne 'l' } $f +ilediff->Items(2); print "new: $_\n" for @new_items; # Entries only on tree 1, but no symlink my @old_items = grep { ! $left_info{ $_ }->{type} ne 'l' } $fi +lediff->Items(1); print "del: $_\n" for @old_items; }; } for my $same (@samelist) { my $linfo = $left_info{ $same }; my $rinfo = $right_info{ $same }; #warn "File: $same"; #warn Dumper $linfo; #warn Dumper $rinfo; if( $linfo->{type} ne $rinfo->{type} ) { print "$same: Link vs. file: $linfo->{type} => $rinfo->{type}\ +n"; }; next if $linfo->{type} eq 'l' or $rinfo->{type} eq 'l'; if( ! $ignore_owner ) { if( $left_info{ $same }->{user} ne $right_info{ $same }->{ +user} or $left_info{ $same }->{group} ne $right_info{ $same }->{ +group} ) { print "$same: Ownership different: $left_info{ $same }->{use +r}:$left_info{$same}->{group} ne $right_info{ $same }->{user}:$right_ +info{$same}->{group}\n"; }; }; if( ! $ignore_mode ) { if( $left_info{ $same }->{mode} ne $right_info{ $same }->{m +ode} ) { print "$same: Mode different: $left_info{ $same }->{mode} $r +ight_info{$same}->{mode}\n"; }; }; if( ! $skip_contents ) { diff( $same, $server1, $server2 ); }; };