Beefy Boxes and Bandwidth Generously Provided by pair Networks
Just another Perl shrine
 
PerlMonks  

Re^3: My code seems messy...

by Miguel (Friar)
on May 14, 2006 at 15:56 UTC ( [id://549329]=note: print w/replies, xml ) Need Help??


in reply to Re^2: My code seems messy...
in thread My code seems messy...

Here I start by checking if there's a valid month name in the string. If there is, I start extracting the dates from the string; else skipt that line.
Note that I had to add a space after the first '...' because '...September' is not a valid month name.
Updated to return valid dates
#!/usr/bin/perl # filename: extract_dates.pl use strict; use warnings; use Data::Dumper; $Data::Dumper::Indent = 1; my @dates; push @dates, findMonth($_) while (<DATA>); print Dumper \@dates; # or do something else with your dates sub findMonth { my @words = split / /,shift; my %months = map {$_ => 1 } qw/ january jan february feb march mar april apr may may june jun july jul august aug september sep october oct november nov december dec /; foreach (@words) { if (exists $months{lc($_)} ) { return extractDate( { MONTH => $_, STRING => "@words" } ); last; } } return; } sub extractDate { my $self = shift; return makeValidDate( {STRING=>$self->{STRING},DATE=>$1} ) if ($self->{STRING} =~ / ( (?: ( [123]?\d (st|nd|rd|th)? \s+ )? $self->{MONTH} \s\d{1,4} ( (-[123]?\d)? (,\s[123]?\d\s)* (and\s\d+)? (,\s\d{4})? )? ) ) /x ); return; } sub makeValidDate { my $self = shift; my ($string) = $self->{STRING} =~/^(.+)$/; $self->{DATE} =~s/-/X/g; $self->{DATE} =~s/(^-|\W|and|th|st|nd|rd)/ /gi; my @date = split /\s+/,$self->{DATE}; my $date = {}; foreach (@date) { if ($_=~/^\d{1,2}$/) { push @{$date->{days}},$_ } elsif ($_=~/^\d{4}$/) { $date->{year} = $_ } elsif ($_=~/(\d+)X(\d+)/) { push @{$date->{days}},$1 .. $2 } else { $date->{month} = lc($_) } } use Date::Manip; my $out_date = {}; foreach (@{$date->{days}}) { my $temp_date = ParseDate( $_ . " " . $date->{month} . " " . $date->{year} ); $temp_date = &UnixDate($temp_date,"%D"); push @{$out_date->{dates}},$temp_date; $out_date->{string} = $string; } return $out_date; } __DATA__ ... the next social club meeting is on April 15, 1994... ... September 21-23, 1994 we will be hosting visitors... ... submissions should be made by 11 February 1994... ... Mail sent 7 Feb 1994... ... On the 16th September 1994 Mr X will be giving a talk on ... ... unconfirmed conference dates are March 4, 5 and 6, 1994...
$VAR1 = [ { 'dates' => [ '04/15/94' ], 'string' => '... the next social club meeting is on April 15, 1994 +...' }, { 'dates' => [ '09/21/94', '09/22/94', '09/23/94' ], 'string' => '... September 21-23, 1994 we will be hosting visitors +...' }, { 'dates' => [ '02/11/94' ], 'string' => '... submissions should be made by 11 February 1994... +' }, { 'dates' => [ '02/07/94' ], 'string' => '... Mail sent 7 Feb 1994...' }, { 'dates' => [ '09/16/94' ], 'string' => '... On the 16th September 1994 Mr X will be giving a +talk on ...' }, { 'dates' => [ '03/04/94', '03/05/94', '03/06/94' ], 'string' => '... unconfirmed conference dates are March 4, 5 and 6 +, 1994...' } ];

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://549329]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others scrutinizing the Monastery: (5)
As of 2024-04-19 12:35 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found