Help SQL::Tidy

eric256 has asked for the wisdom of the Perl Monks concerning the following question:

I'm trying to fix up SQL::Tidy to do a few more things (and working on contacting the author about patching and why its not on CPAN ;) ) but I've run into an issue I'm not sure how to approach. The code uses SQL::Tokenizer to break up the SQL. However it ends up breaking count(*) into ['count', '(','*',')']. I can't figure out how to get from that back to the proper output, unless I guess I supply a list of functions? However since I could have user defined functions I'm not really sure what to do here. Can anyone see how I could get back to the original form?

Input SQL

SELECT 
"CATEGORY_ID",
"CATEGORY_NAME",
"DESCRIPTION",
"PARENT_ID",
(SELECT count(*) FROM nf_posts WHERE
 category_id IN (
        SELECT category_id FROM NF_CATEGORY 
        START WITH category_id = cat.category_id
        CONNECT BY parent_id = PRIOR category_id
 ) 
 AND parent_id is null) topics,
 (SELECT count(*) FROM nf_posts WHERE
 category_id IN (
        SELECT category_id FROM NF_CATEGORY 
        START WITH category_id = cat.category_id
        CONNECT BY parent_id = PRIOR category_id
 ) 
 AND parent_id is not null) replies from   "NF_CATEGORY" cat
where Parent_ID = :P4_Category_ID
and nf_user_can_str(:APP_USER, category_id, 'view') = 'T'
[download]

Formatting with my version gives me:

 SELECT "CATEGORY_ID", "CATEGORY_NAME", "DESCRIPTION", "PARENT_ID",
      ( SELECT count ( * )
        FROM nf_posts
        WHERE CATEGORY_ID IN
             ( SELECT CATEGORY_ID
               FROM NF_CATEGORY START WITH CATEGORY_ID = CAT.CATEGORY_
+ID
                    CONNECT BY parent_id = PRIOR CATEGORY_ID
             )
             AND parent_id is null
      ) topics,
      ( SELECT count ( * )
        FROM nf_posts
        WHERE CATEGORY_ID IN
             ( SELECT CATEGORY_ID
               FROM NF_CATEGORY START WITH CATEGORY_ID = CAT.CATEGORY_
+ID
                    CONNECT BY parent_id = PRIOR CATEGORY_ID
             )
             AND parent_id is not null
      ) replies
 FROM "NF_CATEGORY" cat
 WHERE Parent_ID = :P4_CATEGORY_ID
      AND nf_user_can_str ( :APP_USER, CATEGORY_ID, 'view' ) = 'T'
[download]

And the whole module:

package SQL::Tidy;

use strict;
use warnings;

our $VERSION = 0.01;

use SQL::Tokenizer;
use Data::Dumper;

use constant KEYWORDS => (
    'select', 'from', 'where', 'order', 'group', 'join',
);

sub new {
    my $class = shift;
    my %args = (
        # Some defaults
        indent => '     ',
        width  => 75,
        keywords => [ KEYWORDS ],
        margin => '',
        @_
    );

    my $self = bless {}, ref($class) || $class;
    my $keywords = delete($args{'keywords'});
    while (my ($k, $v) = each(%args)) {
        $self->$k($v);
    }
    $self->add_keywords(@$keywords);
    return $self;
}

sub add_keywords {
    my ($self, @keywords) = @_;
    for my $keyword (@keywords) {
        $self->{'_keywords'}{lc($keyword)} = 1;
    }
    return;
}

sub _is_keyword { exists(shift->{'_keywords'}{lc(shift)}) }

sub newline {
        my $self = shift;
        
        $self->{line}++;
        my $current_group = $self->{groups}->[-1];
        my $prefix =  $self->margin ;
        
        if (defined $current_group) {
                #add group indentation level
                $prefix   .=  ' ' x $current_group->{indent}; 
        };
        
        $prefix   .=  $self->indent x $self->{level};

        $self->{column}  = length($prefix); #reset colum to end of pre
+fix               
        $self->{retval} .= "\n" . $prefix;
        $self->{newline} = 0;
}


sub tidy {
    my ($self, $query) = @_;
    my @tokens = grep !/^\s+$/, SQL::Tokenizer->tokenize($query);

    my $retval;

    #array of hash, including base indentation for the whole group and
    # including the line it starts on
    my $keywords = qw/^SELECT|FROM|WHERE|LIMIT|BY|GROUP|CONNECT|AND|OR
+|JOIN$/;
    my @groups;

    $self->{column}  = length($self->margin);
    $self->{line}    = 0;
    $self->{newline} = 0;

    my ($token,$next_token, $last_token) = ('','','');
     
    while (1) {
        
        last unless @tokens || $next_token;
        $last_token = $token;
        $token      = $next_token || shift @tokens;
        $next_token = shift @tokens;
        $token = uc($token) if $token =~ /$keywords/i;
       
        $self->{level} += 2 if $last_token =~ /WHERE/i;
        
        if ($token eq 'SELECT' and $last_token ne '(') {
                $self->{level}++;
        } elsif ($token eq '(' ) {
                $self->newline() if ($next_token eq 'SELECT');
                push @{$self->{groups}}, 
                        {  level      => $self->{level},
                           start_line => $self->{line},
                           indent     => $self->{column} + length($tok
+en) + 1};
                $self->{level} = 1;                           
        } elsif ($token =~ /FROM|WHERE|AND/i) {
                $self->{newline} = 1;
                $self->{level}-- if $token =~ /FROM|WHERE/i;
        } elsif ($token eq ')') {
                my $group = pop @{$self->{groups}};
                if ($group) {
                        $self->{level} = $group->{level};             
+   
                        if ($self->{line} != $group->{start_line}) {
                                $self->newline();
                        };


                }
        }

        # if we arn't forcing a newline then check if the lenght 
        # of the token + $column +1 is larger than the width
        # where column = current indentation
        if (    !$self->{newline} 
             && length($token) + $self->{column} + 1 > $self->width)
        {
            $self->{level}++;            
            $self->{newline} = 1;
        }

        #if flagged for newline before current token increment line co
+unter,
        #              calc indentation prefix, and current indentatio
+n length
        #              add output to $retval string.
        $self->newline() if $self->{newline};

        unless ($self->{newline} || $token eq ',') {
            #append space unless newline or token is a ,
            $self->{retval} .= ' ';
            $self->{column}++;
        }

        $self->{retval} .= $token;
        $self->{column} += length($token);
    }

    $self->{retval} .= "\n";
    return $self->{retval};
}

# Generate accessors:
for my $method (qw(width indent margin keywords)) {
    no strict 'refs';
    *{$method} = sub {
        my $self = shift;
        if (@_) {
            $self->{'_' . $method} = shift;
        }
        return $self->{'_' . $method};
    };
}

1;
[download]

___________
Eric Hodges

Back to Seekers of Perl Wisdom