<?xml version="1.0" encoding="windows-1252"?>
<node id="989475" title="Re^3: Retain first 4 characters of a string" created="2012-08-24 06:09:05" updated="2012-08-24 06:09:05">
<type id="11">
note</type>
<author id="961">
Anonymous Monk</author>
<data>
<field name="doctext">
&lt;p&gt; And here it is, a bunch &lt;c&gt;
#!/usr/bin/perl --
use strict; use warnings; 
use 5.010;
use Data::Dumper::Names;

my $orig  = 'Apple iPhone 4 Black Cover';
my $want = 'Appl-iPho-4-Blac-Cove';
say Dumper $orig, $want;
my $trim = join ' ', (split /\s+/, $orig)[0..1];
say Dumper $trim;
say "THE PROBLEM WITH YOUR ATTEMPT IS RETURN VALUE OF SPLIT ", Dumper([ split /\s+/, $orig ]);
say "OF WHICH YOU ONLY TAKE FIRST 2 ", Dumper([ (split /\s+/, $orig)[0..1] ]);
say "YOU NEED MATCH OPERATOR AND MAP AS WELL";
$trim = join '-', map { /^(\w{1,4})/ } split /\s+/, $orig;
say Dumper $want, $trim ;
say "SAME THING USING SUBSTITUTION OPERATOR";
$trim = $orig;
$trim =~ s{
(?:
  (\w{1,4}) # capture first 1-to-4 word chars in $1
  \w*       # match and ignore any remaining words 
)
|           # or
(\s+)       # any save any-amount-of-whitespace into $2
}{
   defined $2   # if defined $2
   ? '-'        # then return '-'
   : $1;        # else return $1
}gex;
say Dumper $want, $trim ;

$trim = $orig;
say "AND MAYBE FASTER, first transliterate spaces into -";
$trim =~ tr/\t\r\n /-/s;
say Dumper $trim;
say "then shorten words to 4 chars using perl 5.10 feature \\K";
$trim =~ s{\w{4}\K\w*}{}g; # ignore first 4 word chars, delete others
say Dumper $want, $trim ;


say "USING MATCH OPERATOR ONLY ";
$trim = join '-',  $orig =~ m{(\w{4})\w*\b|(\w{1,3})}g;
say Dumper $want, $trim ;

say "THOSE CAPTURE GROUPS ARE TRICKY ";
$trim = join '-',  grep defined, $orig =~ m{(\w{4})\w*\b|(\w{1,3})}g;
say Dumper $want, $trim ;

__END__
$ perl trim.pl
$orig = 'Apple iPhone 4 Black Cover';
$want = 'Appl-iPho-4-Blac-Cove';

$trim = 'Apple iPhone';

THE PROBLEM WITH YOUR ATTEMPT IS RETURN VALUE OF SPLIT $VAR1 = [
          'Apple',
          'iPhone',
          '4',
          'Black',
          'Cover'
        ];

OF WHICH YOU ONLY TAKE FIRST 2 $VAR1 = [
          'Apple',
          'iPhone'
        ];

YOU NEED MATCH OPERATOR AND MAP AS WELL
$want = 'Appl-iPho-4-Blac-Cove';
$trim = 'Appl-iPho-4-Blac-Cove';

SAME THING USING SUBSTITUTION OPERATOR
$want = 'Appl-iPho-4-Blac-Cove';
$trim = 'Appl-iPho-4-Blac-Cove';

AND MAYBE FASTER, first transliterate spaces into -
$trim = 'Apple-iPhone-4-Black-Cover';

then shorten words to 4 chars using perl 5.10 feature \K
$want = 'Appl-iPho-4-Blac-Cove';
$trim = 'Appl-iPho-4-Blac-Cove';

USING MATCH OPERATOR ONLY
Use of uninitialized value $orig in join or string at trim.pl line 42.
Use of uninitialized value $orig in join or string at trim.pl line 42.
Use of uninitialized value $orig in join or string at trim.pl line 42.
Use of uninitialized value $orig in join or string at trim.pl line 42.
Use of uninitialized value $orig in join or string at trim.pl line 42.
$want = 'Appl-iPho-4-Blac-Cove';
$trim = 'Appl--iPho---4-Blac--Cove-';

THOSE CAPTURE GROUPS ARE TRICKY
$want = 'Appl-iPho-4-Blac-Cove';
$trim = 'Appl-iPho-4-Blac-Cove';

&lt;/c&gt;</field>
<field name="root_node">
989455</field>
<field name="parent_node">
989459</field>
<field name="reputation">
8</field>
</data>
</node>
