http://www.perlmonks.org?node_id=668678

eric256 has asked for the wisdom of the Perl Monks concerning the following question:

I'm trying to fix up SQL::Tidy to do a few more things (and working on contacting the author about patching and why its not on CPAN ;) ) but I've run into an issue I'm not sure how to approach. The code uses SQL::Tokenizer to break up the SQL. However it ends up breaking count(*) into ['count', '(','*',')']. I can't figure out how to get from that back to the proper output, unless I guess I supply a list of functions? However since I could have user defined functions I'm not really sure what to do here. Can anyone see how I could get back to the original form?

Input SQL

SELECT "CATEGORY_ID", "CATEGORY_NAME", "DESCRIPTION", "PARENT_ID", (SELECT count(*) FROM nf_posts WHERE category_id IN ( SELECT category_id FROM NF_CATEGORY START WITH category_id = cat.category_id CONNECT BY parent_id = PRIOR category_id ) AND parent_id is null) topics, (SELECT count(*) FROM nf_posts WHERE category_id IN ( SELECT category_id FROM NF_CATEGORY START WITH category_id = cat.category_id CONNECT BY parent_id = PRIOR category_id ) AND parent_id is not null) replies from "NF_CATEGORY" cat where Parent_ID = :P4_Category_ID and nf_user_can_str(:APP_USER, category_id, 'view') = 'T'

Formatting with my version gives me:

SELECT "CATEGORY_ID", "CATEGORY_NAME", "DESCRIPTION", "PARENT_ID", ( SELECT count ( * ) FROM nf_posts WHERE CATEGORY_ID IN ( SELECT CATEGORY_ID FROM NF_CATEGORY START WITH CATEGORY_ID = CAT.CATEGORY_ +ID CONNECT BY parent_id = PRIOR CATEGORY_ID ) AND parent_id is null ) topics, ( SELECT count ( * ) FROM nf_posts WHERE CATEGORY_ID IN ( SELECT CATEGORY_ID FROM NF_CATEGORY START WITH CATEGORY_ID = CAT.CATEGORY_ +ID CONNECT BY parent_id = PRIOR CATEGORY_ID ) AND parent_id is not null ) replies FROM "NF_CATEGORY" cat WHERE Parent_ID = :P4_CATEGORY_ID AND nf_user_can_str ( :APP_USER, CATEGORY_ID, 'view' ) = 'T'

And the whole module:

package SQL::Tidy; use strict; use warnings; our $VERSION = 0.01; use SQL::Tokenizer; use Data::Dumper; use constant KEYWORDS => ( 'select', 'from', 'where', 'order', 'group', 'join', ); sub new { my $class = shift; my %args = ( # Some defaults indent => ' ', width => 75, keywords => [ KEYWORDS ], margin => '', @_ ); my $self = bless {}, ref($class) || $class; my $keywords = delete($args{'keywords'}); while (my ($k, $v) = each(%args)) { $self->$k($v); } $self->add_keywords(@$keywords); return $self; } sub add_keywords { my ($self, @keywords) = @_; for my $keyword (@keywords) { $self->{'_keywords'}{lc($keyword)} = 1; } return; } sub _is_keyword { exists(shift->{'_keywords'}{lc(shift)}) } sub newline { my $self = shift; $self->{line}++; my $current_group = $self->{groups}->[-1]; my $prefix = $self->margin ; if (defined $current_group) { #add group indentation level $prefix .= ' ' x $current_group->{indent}; }; $prefix .= $self->indent x $self->{level}; $self->{column} = length($prefix); #reset colum to end of pre +fix $self->{retval} .= "\n" . $prefix; $self->{newline} = 0; } sub tidy { my ($self, $query) = @_; my @tokens = grep !/^\s+$/, SQL::Tokenizer->tokenize($query); my $retval; #array of hash, including base indentation for the whole group and # including the line it starts on my $keywords = qw/^SELECT|FROM|WHERE|LIMIT|BY|GROUP|CONNECT|AND|OR +|JOIN$/; my @groups; $self->{column} = length($self->margin); $self->{line} = 0; $self->{newline} = 0; my ($token,$next_token, $last_token) = ('','',''); while (1) { last unless @tokens || $next_token; $last_token = $token; $token = $next_token || shift @tokens; $next_token = shift @tokens; $token = uc($token) if $token =~ /$keywords/i; $self->{level} += 2 if $last_token =~ /WHERE/i; if ($token eq 'SELECT' and $last_token ne '(') { $self->{level}++; } elsif ($token eq '(' ) { $self->newline() if ($next_token eq 'SELECT'); push @{$self->{groups}}, { level => $self->{level}, start_line => $self->{line}, indent => $self->{column} + length($tok +en) + 1}; $self->{level} = 1; } elsif ($token =~ /FROM|WHERE|AND/i) { $self->{newline} = 1; $self->{level}-- if $token =~ /FROM|WHERE/i; } elsif ($token eq ')') { my $group = pop @{$self->{groups}}; if ($group) { $self->{level} = $group->{level}; + if ($self->{line} != $group->{start_line}) { $self->newline(); }; } } # if we arn't forcing a newline then check if the lenght # of the token + $column +1 is larger than the width # where column = current indentation if ( !$self->{newline} && length($token) + $self->{column} + 1 > $self->width) { $self->{level}++; $self->{newline} = 1; } #if flagged for newline before current token increment line co +unter, # calc indentation prefix, and current indentatio +n length # add output to $retval string. $self->newline() if $self->{newline}; unless ($self->{newline} || $token eq ',') { #append space unless newline or token is a , $self->{retval} .= ' '; $self->{column}++; } $self->{retval} .= $token; $self->{column} += length($token); } $self->{retval} .= "\n"; return $self->{retval}; } # Generate accessors: for my $method (qw(width indent margin keywords)) { no strict 'refs'; *{$method} = sub { my $self = shift; if (@_) { $self->{'_' . $method} = shift; } return $self->{'_' . $method}; }; } 1;

___________
Eric Hodges