use warnings; use strict; use autodie; use Test::More 'no_plan'; use Test::NoWarnings; use Data::Dump qw(dd); use constant EXPECTED => <', \ my $output; # simulate file i/o for testing # regexes of cluster representative and member record fields. use constant RX_REP => qr{ \b [[:upper:]] [[:lower:]]+ _ [[:lower:]] \b }xms; use constant RX_MBR => qr{ \b [[:upper:]] [[:lower:]]+ _ [[:lower:]] \b }xms; use constant SEP => ', '; # output record separator use constant TERM => "\n"; # output record terminator # WARNING: the empty string SHALL NOT be a possible cluster representative. my $previous_rep = ''; RECORD: while (my $record = ) { # ignore... next RECORD unless $record =~ m{ \S }xms; # blank lines my $parsed = my ($rep, $member) = $record =~ m{ \A \s* (${ \RX_REP }) \s+ # representative (${ \RX_MBR }) \s* # member (?: [#] [^\n]*)? # optional comment \Z }xmsg; die "bad record: '$record'" unless $parsed; my $new_cluster_begins = $previous_rep ne $rep; if ($new_cluster_begins) { # at start of each new cluster, cluster representative # must be same as cluster member. $rep eq $member or die # just checking "representative '$rep' not same as member '$member' ", "at start of new cluster '$record'"; # mark new cluster. $previous_rep = $rep; # terminate current cluster, if any; begin new one. terminate_cluster($fh_out); begin_new_cluster($fh_out, $rep); # representative/member same in new cluster: ignore append. next RECORD; } # not start of new cluster: append latest member to output. append_to_cluster($fh_out, $member); } # terminate final cluster, if any. terminate_cluster($fh_out); is $output, EXPECTED, "test output"; done_testing; close $fh_out; exit; # subroutines ###################################################### { # begin function closure my $begun; # private: output has begun; initial value false # begin new cluster. sub begin_new_cluster { my ($fh, # file handle: output stream $representative, # str: representative ) = @_; $begun = # if print succeeds, we've begun print $fh $representative; } # append latest member to output record. sub append_to_cluster { my ($fh, # file handle: output stream $member, # str: member ) = @_; print $fh SEP, $member; } # terminate current cluster, if any. sub terminate_cluster { my ($fh, # file handle: output stream ) = @_; return unless $begun; # output not begun yet: do nothing print $fh TERM; } } # end function closure __DATA__ Osat_a Osat_a # just one cluster member Atha_b Atha_b # >1 cluster member, this & next line = 2 members Atha_b Mtru_c Fves_d Fves_d # this & next 2 lines = 3 cluster members Fves_d Osat_e Fves_d Atha_f Atha_g Atha_g # just 1 cluster member Osat_h Osat_h Osat_h Atha_i Mtru_j Mtru_j # just 1 cluster member