So I started working on a tree-like DS for my data. I would really appreciate to get a small code review! (more about the approach then the code itself)
I'll explain what I did first: Basically I have four classes. The
FileNode represents a file ,
DirNode represents a directory,
LinkNode represents a link and
PathsTree represents the tree of nodes. Dir node has a hash of nodes that it points to. The Link node has one node that it points to. Now, I also added
display_tree method for debugging purposes (you can't ignore it). The code of
LinkNode:
package LinkNode;
sub new {
my ($class,$node_name,$node_target,$link_type) = @_;
my $self = {
"nodes" => {},
"type" => $link_type,
"name" => $node_name,
"target" => $node_target
};
bless $self, $class;
return $self;
}
sub get_child_node_by_name {
my ($self,$node_name) = @_;
if (defined($self->{"nodes"}{$node_name})) {
return $self->{"nodes"}{$node_name};
}
return undef;
}
sub display_tree {
my ($self,$prefix) = @_;
print($prefix." ".$self->{"name"}." -> ".$self->{"target"}."\n");
}
1;
The code of
FileNode:
package FileNode;
sub new {
my ($class,$node_name) = @_;
my $self = {
"type" => "file",
"name" => $node_name
};
bless $self, $class;
return $self;
}
sub display_tree {
my ($self,$prefix) = @_;
print($prefix." ".$self->{"name"}."\n");
}
1;
The code of
DirNode:
package DirNode;
use file_node qw(:all);
use link_node qw(:all);
sub new {
my ($class,$node_name,$node_type) = @_;
my $self = {
"nodes" => {},
"type" => $node_type,
"name" => $node_name
};
bless $self, $class;
return $self;
}
sub get_child_node_by_name {
my ($self,$node_name) = @_;
if (defined($self->{"nodes"}{$node_name})) {
return $self->{"nodes"}{$node_name};
}
return undef;
}
sub add_child_node {
my ($self,$node_name,$node_type) = @_;
if ($node_type eq "dir") {
my $new_node = new DirNode($node_name);
$self->{"nodes"}{$node_name} = $new_node;
return $new_node;
} elsif ($node_type eq "file") {
my $new_node = new FileNode($node_name);
$self->{"nodes"}{$node_name} = $new_node;
return $new_node;
}
return undef;
}
sub add_child_link_node {
my ($self,$node_name,$node_target,$node_type) = @_;
my $new_node = new LinkNode($node_name,$node_target,$node_type);
$self->{"nodes"}{$node_name} = $new_node;
return $new_node;
}
sub get_dirs {
my ($self) = @_;
my $path = $self->{"name"};
my %paths;
foreach my $current_node (values(%{$self->{"nodes"}})) {
if ($current_node->isa('DirNode')) {
my %subpaths = $current_node->get_dirs();
foreach my $subpath (keys(%subpaths)) {
my $updated_path = catdir($path,$subpath);
$paths{$updated_path} += 1;
}
}
}
if (scalar(keys(%paths)) == 0) {
$paths{$path} += 1;
}
return %paths;
}
sub get_files {
my ($self) = @_;
my $path = $self->{"name"};
my %paths;
foreach my $current_node (values(%{$self->{"nodes"}})) {
if ($current_node->isa('DirNode')) {
my %subpaths = $current_node->get_files();
foreach my $subpath (keys(%subpaths)) {
my $updated_path = catdir($path,$subpath);
$paths{$updated_path} += 1;
}
} elsif ($current_node->isa('FileNode')) {
my $updated_path = catdir($path,$current_node->{"name"});
$paths{$updated_path} += 1;
}
}
return %paths;
}
sub get_links {
my ($self) = @_;
my $path = $self->{"name"};
my %paths;
foreach my $current_node (values(%{$self->{"nodes"}})) {
if ($current_node->isa('DirNode')) {
my %subpaths = $current_node->get_links();
foreach my $subpath (keys(%subpaths)) {
my $updated_path = catdir($path,$subpath);
$paths{$updated_path} = $subpaths{$subpath};
}
} elsif ($current_node->isa('LinkNode')) {
my $target_path = $current_node->{"target"};
my $updated_path = catdir($path,$current_node->{"name"});
$paths{$updated_path} = $target_path;
}
}
return %paths;
}
sub display_tree {
my ($self,$prefix) = @_;
print($prefix." ".$self->{"name"}."\n");
foreach my $current_node (values(%{$self->{"nodes"}})) {
$current_node->display_tree($prefix."-");
}
}
1;
And the most important part - the code of
PathsTree:
package PathsTree;
use strict;
use dir_node qw(:all);
use Cwd qw(cwd abs_path getcwd);
use File::Basename qw(fileparse);
use File::Spec::Functions qw(file_name_is_absolute splitdir catfile ca
+tdir);
sub new {
my ($class) = @_;
my $self = {
_root_node => new DirNode('/',"dir")
};
bless $self, $class;
return $self;
}
sub display_tree {
my ($self) = @_;
return $self->{"_root_node"}->display_tree("-");
}
sub get_links {
my ($self) = @_;
return $self->{"_root_node"}->get_links();
}
sub get_files {
my ($self) = @_;
return $self->{"_root_node"}->get_files();
}
sub get_dirs {
my ($self) = @_;
return $self->{"_root_node"}->get_dirs();
}
sub get_node_by_path {
my ($self,$path) = @_;
my @subpaths = splitdir($path);
my $prev_node = $self->{_root_node};
for my $index (0..$#subpaths) {
next if (@subpaths[$index] eq "");
my $current_node_name = @subpaths[$index];
my $current_child = $prev_node->get_child_node_by_name(@subpat
+hs[$index]);
unless ($current_child) {
return undef;
}
$prev_node = $current_child;
}
return $prev_node;
}
sub add_path {
my ($self,$path) = @_;
my @subpaths = splitdir($path);
my $prev_node = $self->{_root_node};
for my $index (0..$#subpaths) {
next if (@subpaths[$index] eq ""); # Ignore empty strings
my $current_path = catdir(@subpaths[0..$index]);
my $current_node_name = @subpaths[$index];
if (-l $current_path) {
my @resloved_links = resolve_symlink($current_path);
foreach my $link (@resloved_links) {
if ($link eq $current_path) {
next;
}
$self->add_path($link);
}
my $current_link_target = readlink($current_path);
my $child_node = $self->get_node_by_path($current_path);
if ($child_node) {
$prev_node = $child_node;
} else {
$child_node = $prev_node->add_child_link_node($current
+_node_name,$current_link_target,"regular_link");
$prev_node = $child_node;
}
$prev_node = $self->get_node_by_path($current_link_target)
+;
} elsif (-f $current_path) {
my $child_node = $self->get_node_by_path($current_path);
if ($child_node) {
$prev_node = $child_node;
} else {
$child_node = $prev_node->add_child_node($current_node
+_name,"file");
$prev_node = $child_node;
}
} elsif (-d $current_path) {
my $child_node = $self->get_node_by_path($current_path);
if ($child_node) {
$prev_node = $child_node;
} else {
$child_node = $prev_node->add_child_node($current_node
+_name,"dir");
$prev_node = $child_node;
}
} else {
log("Ignoring path with unknown type: $current_path");
}
}
}
# TODO: Move to a separated file
sub resolve_symlink {
my ($file) = @_;
unless (file_name_is_absolute($file)) {
log("Failed to resolve link $file");
return;
}
my @files;
my $origwd = getcwd;
my $rv = eval {
my $f = $file;
while (1) {
my $dir;
($f,$dir) = fileparse($f);
last unless (-d $dir);
chdir $dir or die "chdir $dir: $!";
push @files, catfile(getcwd,$f);
last unless (-l $f);
defined( $f = readlink $f ) or die "readlink $f (cwd=".get
+cwd."): $!";
}
1 }; my $err = $@ || 'unknown error';
chdir $origwd or (log("Failed to chdir $origwd: $!") && return);
die $err unless ($rv);
return @files ? @files : ($file);
}
1;
Test case:
my $paths_tree = new PathsTree();
foreach my $path (keys(%paths)) {
$paths_tree->add_path($path);
}
$paths_tree->display_tree();
my %dirs = $paths_tree->get_dirs();
my %files = $paths_tree->get_files();
my %links = $paths_tree->get_links();
My thoughts about my code: I got very entangled with the links part. The whole idea was to make it scalable and it's got difficult to follow the code in the "link-handler" section of
add_path method. Also, I probably should use here inheritance - some Node class with basic methods which DirNode, FileNode and LinkNode inherit from.
My code probably contains bugs but I ask you this - is it the right approach? If you remember from the intro, the main feature that I want to add is to use virtual paths instead of logical. So once I done building the graph, I want to provide a list of virtual paths. For example, in the first example that I provided above, if user gave me
/p then I should replace every occurrence of
/a/b/c with
/p in the graph. Do you think it will not be hard to do with my current approach?
P.S. originally this post was part of the main post but as suggested, I moved it to a separated reply.