package SQL::Tidy; use strict; use warnings; our $VERSION = 0.01; use SQL::Tokenizer; use Data::Dumper; use constant KEYWORDS => ( 'select', 'from', 'where', 'order', 'group', 'join', ); sub new { my $class = shift; my %args = ( # Some defaults indent => ' ', width => 75, keywords => [ KEYWORDS ], margin => '', @_ ); my $self = bless {}, ref($class) || $class; my $keywords = delete($args{'keywords'}); while (my ($k, $v) = each(%args)) { $self->$k($v); } $self->add_keywords(@$keywords); return $self; } sub add_keywords { my ($self, @keywords) = @_; for my $keyword (@keywords) { $self->{'_keywords'}{lc($keyword)} = 1; } return; } sub _is_keyword { exists(shift->{'_keywords'}{lc(shift)}) } sub newline { my $self = shift; $self->{line}++; my $current_group = $self->{groups}->[-1]; my $prefix = $self->margin ; if (defined $current_group) { #add group indentation level $prefix .= ' ' x $current_group->{indent}; }; $prefix .= $self->indent x $self->{level}; $self->{column} = length($prefix); #reset colum to end of prefix $self->{retval} .= "\n" . $prefix; $self->{newline} = 0; } sub tidy { my ($self, $query) = @_; my @tokens = grep !/^\s+$/, SQL::Tokenizer->tokenize($query); my $retval; #array of hash, including base indentation for the whole group and # including the line it starts on my $keywords = qw/^SELECT|FROM|WHERE|LIMIT|BY|GROUP|CONNECT|AND|OR|JOIN$/; my @groups; $self->{column} = length($self->margin); $self->{line} = 0; $self->{newline} = 0; my ($token,$next_token, $last_token) = ('','',''); while (1) { last unless @tokens || $next_token; $last_token = $token; $token = $next_token || shift @tokens; $next_token = shift @tokens; $token = uc($token) if $token =~ /$keywords/i; $self->{level} += 2 if $last_token =~ /WHERE/i; if ($token eq 'SELECT' and $last_token ne '(') { $self->{level}++; } elsif ($token eq '(' ) { $self->newline() if ($next_token eq 'SELECT'); push @{$self->{groups}}, { level => $self->{level}, start_line => $self->{line}, indent => $self->{column} + length($token) + 1}; $self->{level} = 1; } elsif ($token =~ /FROM|WHERE|AND/i) { $self->{newline} = 1; $self->{level}-- if $token =~ /FROM|WHERE/i; } elsif ($token eq ')') { my $group = pop @{$self->{groups}}; if ($group) { $self->{level} = $group->{level}; if ($self->{line} != $group->{start_line}) { $self->newline(); }; } } # if we arn't forcing a newline then check if the lenght # of the token + $column +1 is larger than the width # where column = current indentation if ( !$self->{newline} && length($token) + $self->{column} + 1 > $self->width) { $self->{level}++; $self->{newline} = 1; } #if flagged for newline before current token increment line counter, # calc indentation prefix, and current indentation length # add output to $retval string. $self->newline() if $self->{newline}; unless ($self->{newline} || $token eq ',') { #append space unless newline or token is a , $self->{retval} .= ' '; $self->{column}++; } $self->{retval} .= $token; $self->{column} += length($token); } $self->{retval} .= "\n"; return $self->{retval}; } # Generate accessors: for my $method (qw(width indent margin keywords)) { no strict 'refs'; *{$method} = sub { my $self = shift; if (@_) { $self->{'_' . $method} = shift; } return $self->{'_' . $method}; }; } 1;