Magnolia25 has asked for the wisdom of the Perl Monks concerning the following question:
I am able to manage to create the data structure as below.
$VAR1 = {
'99155' => {
'PR' => [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Utuado',
],
'AK' => [
'state_name=Alaska',
'county_names_all=Ketchikan Gateway|Prince of
+ Wales-Hyder',
],
'WA' => [
'state_name=Washington',
'county_names_all=Pend Oreille|Spokane|Lincol
+n|Adams',
'comments=America/Los_Angeles'
]
},
'26134' => {
'WV' => [
'state_name=West Virginia',
'county_names_all=Wirt|Wood|Jackson|Ritchie|
+Calhoun',
'comments=America/New_York'
]
}
};
But Now I Need to get all the "state_name" in above structure into an array.
@array = ('Puerto Rico', 'Alaska', 'Washington', 'West Virginia');
and another array as (Filter "state_name" for which "comments" is present)
@array = ('Puerto Rico', 'Alaska');
So far i tried
for my $outerkey (keys %hash) {
for my $innerkey (keys $hash{$outerkey}) {
print join(', ', @{ $hash{$outerkey}{$innerkey} } );
push( @{ $hash{$outerkey}{$innerkey} }, @array);
}
}
Please help.
Re: Get all hash value into array
by hippo (Bishop) on Feb 01, 2020 at 15:06 UTC
|
But Now I Need to get all the "state_name" in above structure into an array.
Well, if you must start from there perhaps Data::DPath is one option.
#!/usr/bin/env perl
use strict;
use warnings;
use Data::DPath 'dpath';
my $foo = {
'99155' => {
'PR' => [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Utuado',
],
'AK' => [
'state_name=Alaska',
'county_names_all=Ketchikan Gateway|Prince of
+ Wales-Hyder',
],
'WA' => [
'state_name=Washington',
'county_names_all=Pend Oreille|Spokane|Lincol
+n|Adams',
'comments=America/Los_Angeles'
]
},
'26134' => {
'WV' => [
'state_name=West Virginia',
'county_names_all=Wirt|Wood|Jackson|Ritchie|
+Calhoun',
'comments=America/New_York'
]
}
};
my @names = map { /=(.*)/ } dpath ('/*/*/*[0]')->match ($foo);
print "@names\n";
The subsequent filter is left as an exercise.
Update: removed the unnecessary /g modifier. (thanks, LanX) | [reply] [d/l] [select] |
Re: Get all hash value into array
by tybalt89 (Monsignor) on Feb 01, 2020 at 18:40 UTC
|
#!/usr/bin/perl
use strict; # https://perlmonks.org/?node_id=11112221
use warnings;
my $VAR1 = {
'99155' => {
'PR' => [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Utuado',
],
'AK' => [
'state_name=Alaska',
'county_names_all=Ketchikan Gateway|Prince of
+ Wales-Hyder',
],
'WA' => [
'state_name=Washington',
'county_names_all=Pend Oreille|Spokane|Lincol
+n|Adams',
'comments=America/Los_Angeles'
]
},
'26134' => {
'WV' => [
'state_name=West Virginia',
'county_names_all=Wirt|Wood|Jackson|Ritchie|
+Calhoun',
'comments=America/New_York'
]
}
};
my @states;
my @commentedstates;
for ( values %$VAR1 )
{
for ( values %$_ )
{
local $_ = join "\n", @$_;
push @states, /\bstate_name=(.*)/;
/\bcomments=/ and push @commentedstates, $states[-1];
}
}
print "states :\n@states\n\n";
print "states with comments :\n@commentedstates\n\n";
Outputs:
states :
West Virginia Washington Puerto Rico Alaska
states with comments :
West Virginia Washington
| [reply] [d/l] [select] |
Re: Get all hash value into array
by LanX (Saint) on Feb 01, 2020 at 13:04 UTC
|
> I am able to manage to create the data structure as below
An unfortunate format. :(
Your task would be much easier, if you created a hash of hashes of hashes ...
$VAR1 = {
'99155' => {
'PR' => {
state_name => 'Puerto Rico',
county_names_all => 'Adjuntas|Utuado',
},
...
If your primary source was an array database, consider using proper queries to get what you desire.
UPDATE
moved lengthy update to separate reply below
| [reply] [d/l] |
|
Above data structure you can easily be traversed with three nested while loops applying each .
my $VAR1 = {
'99155' => {
'PR' => {
state_name => 'Puerto Rico',
county_names_all => 'Adjuntas|Utuado',
},
},
};
my $v0 = $VAR1;
while ( my ($k1,$v1) = each %$v0 ) {
while ( my ($k2,$v2) = each %$v1 ) {
while ( my ($k3,$v3) = each %$v2 ) {
print "\$VAR1->{$k1}{$k2}{$k3} => '$v3'\n"
}
}
}
output
$VAR1->{99155}{PR}{state_name} => 'Puerto Rico'
$VAR1->{99155}{PR}{county_names_all} => 'Adjuntas|Utuado'
Since your third level is an array you'll have to split it up into a hash.
my $v2 = [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Utuado',
];
my %h3 = map { split /=/,$_,2 } @$v2;
print Dumper \%h3;
$VAR1 = {
'state_name' => 'Puerto Rico',
'county_names_all' => 'Adjuntas|Utuado'
};
This should give you enough hints. :)
HTH!
update
on a side note, you can also use each on arrays
my $v2 = [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Utuado',
];
while ( my ($k3,$v3) = each @$v2 ) {
print "\$v2->[$k3] => '$v3'\n"
}
out
$v2->[0] => 'state_name=Puerto Rico'
$v2->[1] => 'county_names_all=Adjuntas|Utuado'
| [reply] [d/l] [select] |
Re: Get all hash value into array
by 1nickt (Canon) on Feb 01, 2020 at 19:01 UTC
|
Hi, as others have pointed out, your data structure is terrible. You should change the code that generates it if possible. If you are stuck with it ...
First transform the data into something easy to work with. Then get your lists.
use warnings;
use strict;
use feature 'say';
use Data::Dumper;
$Data::Dumper::Terse = $Data::Dumper::Indent = $Data::Dumper::Sortkeys
+ = 1;
$Data::Dumper::Quotekeys = 0;
my $in = get_input();
my %out;
for my $id (keys %{ $in }) {
for my $state (keys %{ $in->{$id} }) {
$out{$state} = { map { split '=' } @{ $in->{$id}{$state} } };
};
}
say 'Transformed input: ', Dumper \%out;
say 'All states: ', Dumper [map { $_->{state_name} } values %out];
say 'Commented: ', Dumper [map { $_->{state_name} } grep { $_->{commen
+ts} } values %out];
#---------------------------------------------------------#
sub get_input {
return {
99155 => {
PR => [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Utuado',
],
AK => [
'state_name=Alaska',
'county_names_all=Ketchikan Gateway|Prince of Wales-Hy
+der',
],
WA => [
'state_name=Washington',
'county_names_all=Pend Oreille|Spokane|Lincoln|Adams',
'comments=America/Los_Angeles',
],
},
26134 => {
WV => [
'state_name=West Virginia',
'county_names_all=Wirt|Wood|Jackson|Ritchie|Calhoun',
'comments=America/New_York',
],
},
};
}
Output:
Transformed input: {
AK => {
county_names_all => 'Ketchikan Gateway|Prince of Wales-Hyder',
state_name => 'Alaska'
},
PR => {
county_names_all => 'Adjuntas|Utuado',
state_name => 'Puerto Rico'
},
WA => {
comments => 'America/Los_Angeles',
county_names_all => 'Pend Oreille|Spokane|Lincoln|Adams',
state_name => 'Washington'
},
WV => {
comments => 'America/New_York',
county_names_all => 'Wirt|Wood|Jackson|Ritchie|Calhoun',
state_name => 'West Virginia'
}
}
All states: [
'Alaska',
'Washington',
'Puerto Rico',
'West Virginia'
]
Commented: [
'Washington',
'West Virginia'
]
See map, grep, split.
Hope this helps!
The way forward always starts with a minimal test.
| [reply] [d/l] [select] |
Re: Get all hash value into array
by Marshall (Canon) on Feb 02, 2020 at 02:22 UTC
|
I suggest a different approach than replies so far. Dealing with complex multi-level hash structures is complex and is often not necessary. I suggest "flattening" your hierarchical structure to a flat table. Searching a table like that is simple, but does cost in performance because you have to examine each row for every search.
Below I show the code to transform what you have now into an Array of Hash. This is similar to the C concept of an Array of Struct. An Array of Array representation is also possible but there a few complications with that, like getting null or default values into the "unused columns".
In general, do not make a hierarchical data structure unless there is clear reason to organize the data that way. The main hash keys should be extremely important and used in almost all queries. I could not figure out what '99155' or '26134' meant although they did look like American Zip Codes to me. These numbers did not figure prominently in your example queries, which is a clue that the data structure is not quite right. It could be that an organization by state abbreviation as a key might make sense?
However, absent any new information, I would go with a flat table. This is very fast for 10K or even 100K entries. Whether that performance is acceptable or not depends upon how often you do it! At row size of 1 million, I would put this into a real DB and use SQL to access it.
Example conversion and access code follows:
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my $VAR1 = {
'99155' => {
'PR' => [
'state_name=Puerto Ri
+co',
'county_names_all=Adj
+untas|Utuado',
],
'AK' => [
'state_name=Alaska',
'county_names_all=Ket
+chikan Gateway|Prince of Wales-Hyder',
],
'WA' => [
'state_name=Washingto
+n',
'county_names_all=Pen
+d Oreille|Spokane|Lincoln|Adams',
'comments=America/Los
+_Angeles'
]
},
'26134' => {
'WV' => [
'state_name=West Vir
+ginia',
'county_names_all=Wi
+rt|Wood|Jackson|Ritchie|Calhoun',
'comments=America/Ne
+w_York'
]
}
};
# Flatten this out to a row structure. One line per unique combination
+ of stuff.
# Each row is represented by an anonymous hash.
# An Array of Hash is similar to the C concept of an Array of Struct.
# An Array of Array representation is also possible.
# This db structure is easily adapatable to an SQL DB.
my @rows;
foreach my $zip (keys %$VAR1)
{
foreach my $twoLetters (keys %{$VAR1->{$zip}})
{
my %fieldHash;
foreach my $field ( @{$VAR1->{$zip}->{$twoLetters}} )
{
my ($detail_name, $detail_value) = split (/=/,$field);
$fieldHash{$detail_name} = $detail_value;
+
}
push @rows, {
zip => $zip,
state => $twoLetters,
%fieldHash
};
}
}
print Dumper \@rows;
=header Prints
$VAR1 = [
{
'state_name' => 'Alaska',
'county_names_all' => 'Ketchikan Gateway|Prince of Wales-H
+yder',
'zip' => '99155',
'state' => 'AK'
},
{
'comments' => 'America/Los_Angeles',
'county_names_all' => 'Pend Oreille|Spokane|Lincoln|Adams'
+,
'zip' => '99155',
'state' => 'WA',
'state_name' => 'Washington'
},
{
'county_names_all' => 'Adjuntas|Utuado',
'state' => 'PR',
'zip' => '99155',
'state_name' => 'Puerto Rico'
},
{
'comments' => 'America/New_York',
'county_names_all' => 'Wirt|Wood|Jackson|Ritchie|Calhoun',
'state' => 'WV',
'zip' => '26134',
'state_name' => 'West Virginia'
}
];
=cut
#print state names:
my @state_names = map{$_->{state_name}}@rows;
print join ",",@state_names,"\n"; # Alaska,Puerto Rico,Washington,We
+st Virginia,
#print only state names that have a comment:
my @comment_state_names = map{($_->{comments}) ? $_->{state_name}: ()}
+@rows;
print join ",",@comment_state_names,"\n"; # West Virginia,Washington,
I would also add that in the above example, state_names were unique. If that were not true, then I would recommend: List::Util qw(uniq) to filter out duplicates.
Revision:
A more complex thing could be:
# Print each state and the counties that have a 'w' in them
foreach my $row_ref (@rows)
{
my @counties = grep{/w/i}split /\|/,$row_ref->{county_names_all};
foreach my $county (@counties)
{
print "$row_ref->{state} $county\n";
}
}
=prints
WV Wirt
WV Wood
AK Ketchikan Gateway
AK Prince of Wales-Hyder
=cut
| [reply] [d/l] [select] |
|
> I suggest "flattening" your hierarchical structure to a flat table.
Your approach implies that the data is essentially analog to a DB table. But you are losing the possibility to index the data by "zip" (?) or "state" with a hash lookup.
> Searching a table like that is simple,
I don't see why using 3 nested while each loops are more complicated. It's pretty generic and keeps all data available. (Though you have to take care
not to mess up the each iterator)
> but does cost in performance because you have to examine each row for every search.
Hmm, if I wanted to represent a DB table I'd use an AoA with associated index hashes.
- One at least for the columns aka fields.
- Then one for each "unique" field holding the row indices.
I'm pretty sure this can already be found° on CPAN.
Probably as object or tied array.
I'm not very experienced with NoSQL but this might go into the same direction.
°) a cursory look revealed Data::Table not sure if that's a good example though.
| [reply] |
|
Hi Rolf!
Your approach implies that the data is essentially analog to a DB table. But you are losing the possibility to index the data by "zip" (?) or "state" with a hash lookup.
My approach not only implies a "flat", "de-normalized" DB table, that is what it is. This will work great for a few ten's of thousands of lines. I am not "losing the possibility to index by "zip"". When you get to say 100,000K+ lines, then I would recommend a DB like SQLite. Let the DB take care of indexing. There are to be sure a lot of "if's, and's and but's" with a DB. However the OP's data structure does not appear to me to be efficient.
From what I can tell, the use of the "zip" as a primary key doesn't make any sense. And the OP's hash structure is hard to search and inefficient. Yes, I do think that 1 loop is easier to understand than 3 loops.
Cheers,
Marshall
| [reply] |
Re: Get all hash value into array
by tobyink (Canon) on Feb 03, 2020 at 07:23 UTC
|
my @states = Dumper(\%hash) =~ /'state_name=(.*?)'/g;
| [reply] [d/l] |
Re: Get all hash value into array
by kcott (Archbishop) on Feb 03, 2020 at 08:00 UTC
|
#!/usr/bin/env perl
use strict;
use warnings;
my $VAR1 = {
... as per your OP ...
};
my (@all_states, @comment_states);
for (map values %$_, values %$VAR1) {
my ($state, $comment) = ('', 0);
for (@$_) {
$state = substr $_, 11 if 0 == index $_, 'state_name=';
$comment = 1 if 0 == index $_, 'comments=';
}
push @all_states, $state if $state;
push @comment_states, $state if $comment;
}
use Data::Dump;
dd \@all_states;
dd \@comment_states;
Output:
["Alaska", "Puerto Rico", "Washington", "West Virginia"]
["Washington", "West Virginia"]
That handles the words '"comments" is present';
however, your example output suggests you want '"comments" is absent'.
Changing if to unless in the second push:
push @comment_states, $state unless $comment;
And the output becomes:
["West Virginia", "Washington", "Puerto Rico", "Alaska"]
["Puerto Rico", "Alaska"]
I'll leave you to decide which you actually want. :-)
| [reply] [d/l] [select] |
Re: Get all hash value into array
by leszekdubiel (Scribe) on Feb 02, 2020 at 12:22 UTC
|
my %states_with_comments =
map { ((split /=/, $$_[0], 2)[1], 'hurray!') }
grep { $$_[2] }
map { values %$_ }
values %$db;
print "states are: ", (join "; ", sort, keys %states_with_comments), "
+\n";
result is
root@orion:/tmp# ./mytest.pl
states are: West Virginia; Washington
and here same clarification, read code from bottom to UP (!!!): :
my %states_with_comments = # and finnaly
+ we will take keys from hash
map { ((split /=/, $$_[0], 2)[1], 'hurray!') } # split firs
+t element on satename= and name, take name, make %key => 'hurray!"
grep { $$_[2] } # test if array has 3-rd element t
+hat is comment
map { values %$_ } # take arrays that are below PR, A
+K, WA, WV
values %$db; # make list of hashes below numbers
+99155, 26134
Perl is well suited to process DATA FLOW and not loop, over loop, over loop... You could do loopoing in Python, Javascript. But in Perl you should process sets of data.
If you want to see internals just inject Dumpers:
my %states_with_comments =
grep { warn Dumper({"finally" => $_}); 1; }
map { ((split /=/, $$_[0], 2)[1], 'hurray!') }
grep { warn Dumper({"after filtering" => $_}); 1; }
grep { $$_[2] }
grep { warn Dumper({"second step is" => $_}); 1; }
map { values %$_ }
grep { warn Dumper({"first step is" => $_}); 1; }
values %$db;
result with debugging:
root@orion:/tmp# ./mytest.pl
$VAR1 = {
'first step is' => {
'PR' => [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Ut
+uado'
],
'AK' => [
'state_name=Alaska',
'county_names_all=Ketchikan G
+ateway|Prince of + Wales-Hyder'
],
'WA' => [
'state_name=Washington',
'county_names_all=Pend Oreill
+e|Spokane|Lincol +n|Adams',
'comments=America/Los_Angeles
+'
]
}
};
$VAR1 = {
'first step is' => {
'WV' => [
'state_name=West Virginia',
'county_names_all=Wirt|Wood|J
+ackson|Ritchie| +Calhoun',
'comments=America/New_York'
]
}
};
$VAR1 = {
'second step is' => [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Utuado'
]
};
$VAR1 = {
'second step is' => [
'state_name=Alaska',
'county_names_all=Ketchikan Gateway|Pr
+ince of + Wales-Hyder'
]
};
$VAR1 = {
'second step is' => [
'state_name=Washington',
'county_names_all=Pend Oreille|Spokane
+|Lincol +n|Adams',
'comments=America/Los_Angeles'
]
};
$VAR1 = {
'second step is' => [
'state_name=West Virginia',
'county_names_all=Wirt|Wood|Jackson|Ri
+tchie| +Calhoun',
'comments=America/New_York'
]
};
$VAR1 = {
'after filtering' => [
'state_name=Washington',
'county_names_all=Pend Oreille|Spokan
+e|Lincol +n|Adams',
'comments=America/Los_Angeles'
]
};
$VAR1 = {
'after filtering' => [
'state_name=West Virginia',
'county_names_all=Wirt|Wood|Jackson|R
+itchie| +Calhoun',
'comments=America/New_York'
]
};
$VAR1 = {
'finally' => 'Washington'
};
$VAR1 = {
'finally' => 'hurray!'
};
$VAR1 = {
'finally' => 'West Virginia'
};
$VAR1 = {
'finally' => 'hurray!'
};
states are: West Virginia; Washington
| [reply] [d/l] [select] |
Re: Get all hash value into array
by leszekdubiel (Scribe) on Feb 02, 2020 at 12:16 UTC
|
#!/usr/bin/perl
use utf8;
use Modern::Perl;
use Data::Dumper;
my $db = {
'99155' => {
'PR' => [
'state_name=Puerto Rico',
'county_names_all=Adjuntas|Utuado',
],
'AK' => [
'state_name=Alaska',
'county_names_all=Ketchikan Gateway|Prince of
+ + Wales-Hyder',
],
'WA' => [
'state_name=Washington',
'county_names_all=Pend Oreille|Spokane|Lincol
+ +n|Adams',
'comments=America/Los_Angeles'
]
},
'26134' => {
'WV' => [
'state_name=West Virginia',
'county_names_all=Wirt|Wood|Jackson|Ritchie|
+ +Calhoun',
'comments=America/New_York'
]
}
};
my %states =
map { ((split /=/, $$_[0], 2)[1], 'hurray!') }
map { values %$_ }
values %$db;
print "states are: ", (join "; ", sort, keys %states), "\n";
result is:
root@orion:/tmp# ./mytest.pl
states are: West Virginia; Puerto Rico; Washington; Alaska
| [reply] [d/l] [select] |
|
> most simple solution
Simplest? I doubt that, but it's maybe the shortest.
And your solution depends on the assumption that the array fields have always a fixed index.
| [reply] |
|
#!/usr/bin/perl
use utf8;
use Modern::Perl;
use Data::Dumper;
my $db = {
'99155' => {
'PR' => {
state_name => 'Puerto Rico',
county_names_all => [qw{Adjuntas Utuado}],
},
'AK' => {
state_name => 'Alaska',
county_names_all => ['Ketchikan', 'Gateway',
+'Prince of + Wales-Hyder'],
},
'WA' => {
state_name => 'Washington',
county_names_all => ['Pend Oreille', 'Spokane
+', 'Lincol +nAdams'],
comments => 'America/Los_Angeles'
}
},
'26134' => {
'WV' => {
state_name => 'West Virginia',
county_names_all => ['Wirt', 'Wood', 'Jackso
+n', 'Ritchie', 'Calhoun'],
comments => 'America/New_York'
}
}
};
my %states_with_comments =
map { ($$_{state_name}, 'hurray!') }
grep { $$_{comments} }
map { values %$_ }
values %$db;
print "states are: ", (join "; ", sort, keys %states_with_comments), "
+\n";
Dumper of db:
$VAR1 = {
'26134' => {
'WV' => {
'county_names_all' => [
'Wirt',
'Wood',
'Jackson',
'Ritchie',
'Calhoun'
],
'comments' => 'America/New_York',
'state_name' => 'West Virginia'
}
},
'99155' => {
'WA' => {
'comments' => 'America/Los_Angeles',
'state_name' => 'Washington',
'county_names_all' => [
'Pend Oreille
+',
'Spokane',
'Lincol +nAda
+ms'
]
},
'AK' => {
'county_names_all' => [
'Ketchikan',
'Gateway',
'Prince of +
+Wales-Hyder'
],
'state_name' => 'Alaska'
},
'PR' => {
'state_name' => 'Puerto Rico',
'county_names_all' => [
'Adjuntas',
'Utuado'
]
}
}
};
| [reply] [d/l] [select] |
Re: Get all hash value into array
by Anonymous Monk on Feb 01, 2020 at 12:17 UTC
|
| [reply] |
|
|