#!/usr/bin/perl use strict; use warnings; use Data::Dumper; my @token_def = ( [Whitespace => qr{\s+}, 1], [Comment => qr{#.*\n?$}m, 1], [AddOp => qr{[+-]} ], [MulOp => qr{[*/]} ], [Number => qr{\d+} ], [OpenParen => qr{\(} ], [CloseParen => qr{\)} ], ); my $input = $ARGV[0] || "2 * 3 + 4 # and a comment\n"; my @tokens; pos($input) = 0; while(pos($input) < length $input){ my $matched = 0; for my $t (@token_def){ my ($name, $re, $ignore_flag) = @$t; if ($input =~ m/\G($re)/gc){ $matched = 1; next if $ignore_flag; push @tokens, [$name, $1]; next; } } die "Syntax error at postion " . pos($input) unless $matched } print Dumper \@tokens; __END__ # output reformatted for brevity: $VAR1 = [ [ 'Number', '2' ], [ 'MulOp', '*' ], [ 'Number', '3' ], [ 'AddOp', '+' ], [ 'Number', '4' ] ];