use strict;
use warnings;
use File::Basename qw( fileparse );
my %seen;
while (<DATA>) {
(undef, my $path, undef) = fileparse( $_ );
print "$path\n" unless $seen{$path}++;
}
__DATA__
testing123/
foobar/
helloworld/
helloworld/r1/
helloworld/r1/helloworld-5-0.noarch.rpm
helloworld/r1/testfile23.txt
helloworld/r1/tomcat-7.0.27.rpm
helloworld/r2/
helloworld/r2/helloworld-2-0.noarch.rpm
helloworld/r2/testfile12.txt
helloworld/r2/tomcat-5.0.52.rpm
hellotest/
jeffa
L-LL-L--L-LL-L--L-LL-L--
-R--R-RR-R--R-RR-R--R-RR
B--B--B--B--B--B--B--B--
H---H---H---H---H---H---
(the triplet paradiddle with high-hat)
| [reply] [d/l] |
#!/usr/bin/perl
use strict;
use warnings;
my %seen;
while (<DATA>) {
m/^(.+\/)(.*)$/;
my $path=$1;
print "$path\n" unless $seen{$path}++;
}
__DATA__
testing123/
foobar/
helloworld/
helloworld/r1/
helloworld/r1/helloworld-5-0.noarch.rpm
helloworld/r1/testfile23.txt
helloworld/r1/tomcat-7.0.27.rpm
helloworld/r2/
helloworld/r2/helloworld-2-0.noarch.rpm
helloworld/r2/testfile12.txt
helloworld/r2/tomcat-5.0.52.rpm
hellotest/
| [reply] [d/l] |
May fail on other data
A better regex would be m{^(.+[\\/])[^\\/]*$} which will also work on MS Windows file paths.
But, as you said, would be better to use File::Basename as it will work with the file path syntax of several other OSes.
| [reply] [d/l] |
use strict;
use warnings;
use File::Basename qw( fileparse );
my %seen;
while (<DATA>) {
(undef, my $path) = fileparse( $_ );
print "$path\n" unless $seen{$path}++;
}
print "="x70,"\n";
foreach my $key (keys %seen){
print "$key\n" if 1 == scalar grep { /\Q$key/ } keys %seen;
}
__DATA__
testing123/
foobar/
helloworld/
helloworld/r1/
helloworld/r1/helloworld-5-0.noarch.rpm
helloworld/r1/testfile23.txt
helloworld/r1/tomcat-7.0.27.rpm
helloworld/r2/
helloworld/r2/helloworld-2-0.noarch.rpm
helloworld/r2/testfile12.txt
helloworld/r2/tomcat-5.0.52.rpm
hellotest/
For large files sorting and a pass to remove the current element if the next element matches is needed. Or, assuming that the original file is sorted just check each element against the next one.
use strict;
use warnings;
use File::Basename qw( fileparse );
my %seen;
my @directories;
while (<DATA>) {
(undef, my $path) = fileparse( $_ );
#push @directories, $path;
## update, I indended to put the push inside the unless block.
## The original works but not exactly as I expected since it puts
## duplicates in the array.
#print "$path\n" unless $seen{$path}++;
unless ($seen{$path}++) {
print "$path\n";
push @directories, $path;
}
}
print "="x70,"\n";
foreach my $index (0..$#directories-1){
my $current = $directories[$index];
print "$current\n" if not $directories[$index+1] =~ /\Q$current/ ;
}
### assuming the last one is always terminal
print $directories[$#directories], "\n";
__DATA__
testing123/
foobar/
helloworld/
helloworld/r1/
helloworld/r1/helloworld-5-0.noarch.rpm
helloworld/r1/testfile23.txt
helloworld/r1/tomcat-7.0.27.rpm
helloworld/r2/
helloworld/r2/helloworld-2-0.noarch.rpm
helloworld/r2/testfile12.txt
helloworld/r2/tomcat-5.0.52.rpm
hellotest/
Update: This seems to work on a single pass through the file.
use strict;
use warnings;
use File::Basename qw( fileparse );
my $first='';
my $second='';
while (<DATA>) {
$first = $second;
(undef, $second) = fileparse( $_ );
print "$second\n" if eof DATA;
next unless $first;
unless ( $second =~ /\Q$first/) {
print "$first\n";
}
}
__DATA__
testing123/
foobar/
helloworld/
helloworld/r1/
helloworld/r1/helloworld-5-0.noarch.rpm
helloworld/r1/testfile23.txt
helloworld/r1/tomcat-7.0.27.rpm
helloworld/r2/
helloworld/r2/helloworld-2-0.noarch.rpm
helloworld/r2/testfile12.txt
helloworld/r2/tomcat-5.0.52.rpm
hellotest/test.txt
hellotest/
| [reply] [d/l] [select] |
Thanks for all the examples -- this one works well for me. I don't think the result set will every be large enough to introduce efficiency issues.
| [reply] |
so terminal directories end with /? thats easy push @terminals, $_ if m{/\s*$}sm;
| [reply] [d/l] |
Unless the OP made a mistake, he did not include helloworld/ in his definition of a terminal directory, thus your solution doesn't quite work, but I'll bet you can tweak it so it will work!
| [reply] [d/l] |
Unless the OP made a mistake, he did not include helloworld/ in his definition of a terminal directory, thus your solution doesn't quite work, but I'll bet you can tweak it so it will work! i don't like your phrasing , to which i say, can't you do it?
yeah, none of the solutions filter out the parents of terminals, so with help from Re^4: Want for a name? (between)
#!/usr/bin/perl --
use strict;
use warnings;
sub mapAdj(&$@) {
use vars qw/ $a $b $c $d $e $f $g $h $i $j /;
local( $a, $b, $c, $d, $e, $f, $g, $h, $i, $j );
my( $code, $n ) = ( shift, shift );
map $code->(
($a, $b, $c, $d, $e, $f, $g, $h, $i, $j ) = @_[ $_-$n .. $_ ]
), --$n .. $#_;
}
sub subsumes { return 0 == index $_[1], $_[0]; }
my $termies = "
a/
a/b/
a/b/t/
a/b/t/f
a/c/
a/c/d/
a/c/d/t/
a/c/d/t/f
t/
";
open my($fh),'<',\$termies;
my @terminals;
while(<$fh>){
chomp;
push @terminals, $_ if m{/\s*$}sm;
}
@terminals = mapAdj { subsumes($a,$b) ? ( ) : ( $a ) } 2, (sort @termi
+nals), '';
print "$_\n" for @terminals;
__END__
a/b/t/
a/c/d/t/
t/
| [reply] [d/l] |