use lib "D:/Dev/Fuzz"; use strict; use warnings; use Fuzzy::Matcher; use Fuzzy::Matcher::DFA; use Fuzzy::Matcher::Xor; use Fuzzy::Matcher::Xor2; use Fuzzy::Matcher::Ysth; use Data::Dump::Streamer; use Time::HiRes qw( time ); use Devel::Size qw( total_size ); use Getopt::Long; $Fuzzy::Matcher::DFA::NumFullChecks=-1; exit(main()); # takes $str and produces a hash of all words # that have up to $fuzz characters differet using the # current alpahbet. For testing mostly. sub make_fuzz { my ($str,$fuzz,$ltrs)=@_; $ltrs="ACGT" unless defined $ltrs; $ltrs=[split//,$ltrs||'ACGT'] unless ref $ltrs; my $hash={$str=>0}; my @q=($str,0); while (@q) { my ($s,$d)=splice @q,0,2; my @c=split //,$s; foreach my $p (0..$#c) { for my $n (@$ltrs) { next if $c[$p] eq $n; local $c[$p]=$n; my $str=join "",@c; unless (exists $hash->{$str}) { $hash->{$str}=$d+1; push @q,$str,$d+1 if $d+1<$fuzz; #print $str,"\n"; } } } } return $hash } #die Dump(make_fuzz('AAAAAAAAAA',2)); sub run_search { my ($object,$str)=@_; my $time=time; my $res=$object->fuzz_search($str); $time=time-$time; #print "# ",ref($object),"\t",0+@res,"\n"; my $idx=0; my @ret; my %dupe; my $dupes=0; while ($idx<@$res) { my ($pos,$diff,$str)=@$res[$idx..$idx+2]; $idx+=3; my $packed=pack "NNA*",$pos,$diff,$str; if ($dupe{$packed}++) { $dupes++; next; } push @ret,$packed; } @$res=(); my @res=sort @ret; ($time,$dupes,\@res); } sub run_and_time { my ($obj,$str,$name)=@_; print "# ",ref($obj); my ($time,$dupes,$res)=run_search($obj,$str); printf "%s > %10.4f secs %s Matches:%d%s\n", $name ? sprintf(" %04d ",$name) : "", $time, $dupes ? "($dupes dupes)" : "", 0+@$res, ref($obj)=~/DFA/ ? " (Fullchecks:".$Fuzzy::Matcher::DFA::NumFullChecks.")" : ""; return ($time,$res); } sub show_res { my $res=shift; foreach my $rec (@$res) { print join("\t",unpack "NNA*",$rec),"\n"; } } sub basic_test { my $class=shift; $class="Fuzzy::Matcher::$class" unless $class=~/:/; my $o=eval qq{use $class; $class->new(2,10,"ACGT") }; die $@ if $@; my $test='A' x 10; my $search="A" x 11; print "# -- Basic Test For ".ref($o)." --\n"; print "# Fuzzing '$test'\n"; my $hash=make_fuzz($test,2,"ACGT"); print "# Got ".scalar(keys %$hash)." words\n"; print "# Searching: '$search' for matches\n"; my $expecting=scalar(keys %$hash)*(length($search)-length($test)+1); print "# Expecting ",$expecting," matches\n"; $o->fuzz_store($_) foreach sort keys %$hash; $o->prepare(); my ($time,$dupes,$ret)=run_search($o,$search); printf "# Got %d records (%d dupes) in %10.4f seconds\n", 0+@$ret,$dupes,$time; my $has_errors=0; if (@$ret!=$expecting) { print "Returned incorrect number of records!\n"; print "Was expecting $expecting and got ",scalar(@$ret)," instead.\n"; $has_errors++; show_res($ret); } eval { my %count; my $errors; foreach my $r (@$ret) { my ($ofs,$diff,$str)=unpack "NNA*",$r; if ($hash->{$str}!=$diff) { $errors.="Error: '$str' should have a diff of $hash->{$str} but reports $diff\n"; $has_errors++; } else { $count{$str}+=($ofs+1); } } die $errors if $errors; foreach my $counted (keys %count) { unless (defined delete $hash->{$counted}) { $has_errors++; die "No '$counted' in hash!?"; } unless ($count{$counted}==3) { $has_errors++; die "Bad count $count{$counted} for '$counted'" } } }; if ($@ || $has_errors) { warn "****",ref($o)," Failed basic test (at least $has_errors errors) ****\n",$@ ? $@ : "","\n\n" if ! -t STDOUT; print "****",ref($o)," Failed basic test (at least $has_errors errors) ****\n",$@ ? $@ : "","\n\n" } else{ print "# Returned as expected\n\n"; } return !keys %$hash; } sub open_test_file { my ($test)=@_; open my $fh,"<",$test or die "Failed to open '$test':$!"; my $junk=<$fh>; chomp(my $data=scalar <$fh>); my ($Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars)=split /,/,$data; return ($fh,$Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars); } sub time_test_file { my ($class,$test,$limit)=@_; print "# Running tests from '$test'\n"; my ($fh,$Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars)= open_test_file($test); $class="Fuzzy::Matcher::$class" unless $class=~/:/; my $o=eval qq{use $class; $class->new(2,\$Word_Size,\$Chars) }; die $@ if $@; print "# Testing ",ref($o)."\n# Loading strings..."; my $construct_time=time; while (<$fh>){ chomp; last if /^-/; $o->fuzz_store($_); } my $prepare=time; printf "(%10.4f secs) Running Prepare...",$prepare-$construct_time; $o->prepare(); $construct_time=time-$construct_time; $prepare=time-$prepare; printf "(%10.4f)\n". "# Total Construction took:%10.4f secs Total Size: %s\n". "# Starting search...\n",$prepare,$construct_time,shorten_num_bytes(total_size($o)); my $search_time=0; my $counter=0; chomp(my $str=<$fh>); my ($time,$first)=run_and_time($o,$str,++$counter); my $projected=int($time*$Strings+$construct_time); if ($limit and $limit<$projected) { print "# !!!! Skipping test as projected total time of $projected exceeds limit of $limit!\n\n"; return; } print "# Bulk test"; my $bulk_time=time; while(<$fh>) { print "."; chomp(my $str=$_); my $res=$o->fuzz_search($str); } print "\n"; $bulk_time=time-$bulk_time; $search_time+=$time+$bulk_time; printf "# Words: %d Word Size: %d Strings: %d String Size: %d Fuzz: %d Chars:%s\n", $Words,$Word_Size,$Strings,$String_Size,$Fuzz,$Chars; printf "# Total Time: %10.4f secs (Search %10.4f Construct %10.4f)\n", $construct_time+$search_time,$search_time,$construct_time; printf "# Average Search Time: %10.4f (With Construct: %10.4f)\n", $search_time/$Strings,($construct_time+$search_time)/$Strings; print "##### EOF #####\n\n"; return ($search_time,$construct_time,$first, $Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars); } sub compare_results { my ($name,$length,$res1,$res2)=@_; my ($idx1,$idx2)=(0,0); my @res=("# Compare $name\n"); my (@badres1,@badres2); while ($idx1<@$res1 or $idx2<@$res2) { while ($idx1<@$res1 and $idx2<@$res2 and $res1->[$idx1] eq $res2->[$idx2]) { $idx1++; $idx2++; } while (($idx1<@$res1 and $idx2>=@$res2) or ($idx1<@$res1 and $idx2<@$res2 and $res1->[$idx1] lt $res2->[$idx2])) { push @res,sprintf "# %0d6:%1d:%".$length."s\n", unpack "NNA*",$res1->[$idx1]; push @badres1,$res1->[$idx1]; $idx1++; } while (($idx1>=@$res1 and $idx2<@$res2) or ($idx1<@$res1 and $idx2<@$res2 and $res1->[$idx1] gt $res2->[$idx2])) { push @res,sprintf "# %6s %1s %".$length. "s\t-\t%0d6:%1d:%". $length."s\n", ("")x3,unpack "NNA*",$res2->[$idx2]; push @badres2,$res2->[$idx2]; $idx2++; } } if (@badres1 or @badres2) { push @res,join "","# There were:",@badres1+@badres2," errors\n"; } else { push @res,join "","# The results were identical.\n"; } return (@badres1+@badres2 ? \@res: 0) } sub shorten_num_bytes { my ($n) = @_; my $divisor=1; while (1) { last if ($n/$divisor) < 1024*1.5; $divisor *= 1024; } my %suffixes = ( 1024**0 => '', 1024**1 => 'k', 1024**2 => 'M', 1024**3 => 'G' ); my $suffix=$suffixes{$divisor} ? $suffixes{$divisor}.'B' : 'Bytes'; sprintf "%.0f %s", $n/$divisor, $suffix; } sub center { my ($s,$l)=@_; while (length($s)<$l) { if(length($s)%2) { $s=" $s" } else { $s.=" " }} $s }; sub docs { my ($optstr)=$0=~/([^\\\/]+)$/; return $optstr." [opts] [FILES]".<<'END_OPTS'; --class=A,B --class=C Classes to test (Def: 'DFA','Ysth','Xor','Xor2') --list=filename.txt File containing list of filenames to run. --basic_only Only run basic tests --limit=secs Try not to run tests longer than 'secs' --noearly Be less aggressive about timing out a test. When run with no file arguments will process all .fuzztest files in cwd. Files are run in lexicographical order, and limit effects are cumulative. END_OPTS } sub main { my $list; # filename which holds list of filenames to process my (@Class,@Classes); # classes to process; my @file; # files to process my $basic; # only do basic tests my $limit; # time limit for tests my $early=1; # bail early if we think we will exceed timelimit # when running a filetest my $auto; my $help=0; my $shellout=""; GetOptions ("man|help|doc"=>\$help, "class=s" => \@Class, "list=s"=>$list, "basic_only"=>\$basic, 'limit=i'=>\$limit, 'auto=s'=>\$auto, 'shellout=s'=>\$shellout, 'early!'=>\$early) or die "Bad option!\n".docs(); die(docs()) if $help; @Class = split(/,/,join(',',@Class)); @Class=('DFA','Ysth','Xor2','Xor') unless @Class; @Classes=@Class; if ($auto) { my ($stime,$ctime,$first,$Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars)= time_test_file($Class[0],$auto,$early && $limit,1 ); if (defined $stime) { print "\n",Dump($first)->Names('first')->Indent(0)->Out(), join("\t",'Ok!',$Class[0],$stime,$ctime,$Fuzz,$Words,$Word_Size, $Strings,$String_Size,$Chars,'Ok!'),"\n"; } exit; } for my $class (@Class) { basic_test($class); } exit if $basic; if ($list) { local @ARGV=($list); @file=map { chomp; -e $_ ? $_ : () } <>; } push @file,@ARGV; push @file,glob "*.fuzztest" unless @file; @file=sort @file unless $list; my %filerep; my $filelen=0; my $classlen=0; my %filetests; my %totals; my $last_word_count=0; for my $file (@file) { my (undef,$Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars)= open_test_file($file); if ($Words<$last_word_count) { @Class=@Classes; } $last_word_count=$Words; next if !@Class; my @class=@Class; $filelen=length $file if $filelen&1`); my ($status,$retclass,$emark,$Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars); ($status,$retclass,$stime,$ctime,$Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars,$emark) =split /\t/,$resp[-1]; if ($status and $status eq 'Ok!' and $retclass eq $class and $emark eq $status and $resp[-2]=~/^\$first/) { eval $resp[-2]; if ($@) { die $@; } else { pop @resp; pop @resp; } } else { undef $stime; } if ($shellout=~/err/i) { print STDERR join "\n",@resp,""; } elsif ($shellout!~/none/i) { print join "\n",@resp,""; } } else { ($stime,$ctime,$first,$Fuzz,$Words,$Word_Size,$Strings,$String_Size,$Chars)= time_test_file($class,$file,$early && $limit ); } next unless defined $stime; $first{$class}=$first; $filetests{$class}++; $res{$class}=[ $stime+$ctime, $stime, $ctime]; $totals{$class}+=$stime+$ctime;; $filerep{$file}{$class}=$res{$class}; } { printf "\n----- Summary -----\n"; printf " Words : %7d Word Size : %7d\n Strings: %7d String Size: %7d\n Fuzz : %7d Chars :%s\n\n", $Words,$Word_Size,$Strings,$String_Size,$Fuzz,$Chars; @class=sort { $res{$a}[0] <=> $res{$b}[0] } grep { defined $res{$_} } @class; print " " x ($classlen+2); printf " %s |",center($_,10) for ("Total","Search","Construct"); print "\n"; foreach my $row (@class) { printf "%*s : %10.4f | %10.4f | %10.4f |\n",-$classlen,$row,@{$res{$row}}; } print "\n"; print "Total Speed (R-C) \\ Search Speed (C-R)\n"; print " " x ($classlen+2); printf " %s |",center($_,10) for @class; print "\n"; my %comp; my @report; foreach my $row (@class) { printf "%*s :",-$classlen,$row; my $past_middle=0; foreach my $col (@class) { if ( $row lt $col and !exists($comp{$row}{$col})) { my $res=compare_results("$row/$col",$Word_Size,$first{$row},$first{$col}); $comp{$row}{$col}=$res; push @report,$res if $res; } if ($row lt $col) { print $comp{$row}{$col} ? "*" : " "; } else { print $comp{$col}{$row} ? "*" : " "; } if ($row eq $col) { printf "%-10s |",center("--",10); $past_middle=1; } else { printf "%10.4f |",$past_middle ? $res{$col}[1]-$res{$row}[1] : $res{$row}[0]-$res{$col}[0]; } } print "\n"; } print join "",(map { @$_ } @report),"\n","\n"; } if ($limit) { @Class=grep defined($res{$_}) && $res{$_}[0]<$limit,@Class; } } @Classes=sort { $filetests{$b} <=> $filetests{$a} || $totals{$a} <=> $totals{$b} } @Classes; print " " x ($filelen+2); printf "%s |",center($_,10) for @Classes; print "\n"; foreach my $file (@file) { printf "%*s :",-$filelen,$file; foreach my $class (@Classes) { if ($filerep{$file}{$class}) { printf "%10.4f |",$filerep{$file}{$class}[0]; } else { printf "%10s |",center('N/A',10); } } print "\n"; } print "** End **\n"; 0; }