http://www.perlmonks.org?node_id=428255


in reply to Function Dispatcher Table for Virtual Data Warehouse

must have exceeded a length limit or maybe having multiple readmore/code tags is a problem - here's the code for the dispatcher module:
package Obj_Srvr; # ==================================================================== +======= # Obj_Srvr.pm # # Serves live high-level data from a low level DB, using code instead + of a # Data Warehouse. Will run embedded in obj_srvr sql parser. obj_srv +r # passes the parsed sql to sub get_data() in Obj_Srvr.pm, which retur +ns # the data in delimited character format. This data may be passed to # oracle_obj_srvr.pl which translates it into Net8 packets for the BI +- # Query desktop client. # # obj_srvr.pm makes use of various Student Information System (SIS) p +erl # modules located on the wouprd server (currently Spruce). # # obj_srvr source is generated from obj_srvr.l and obj_srvr.y using l +ex and # yacc, then compiled and linked with exec_sql.o to produce the obj_s +rvr # executable (see Makefile). # # obj_srvr and Obj_Srvr.pm will be installed on the same server as th +e # wouprd database (currently Spruce). # # Jeremy Hickerson, 5/8/2002 # # ==================================================================== +======= use strict; BEGIN { use Exporter (); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); # set the version for version checking $VERSION = 1.00; @ISA = qw(Exporter); @EXPORT = qw(&obj_srvr_connect &get_data &get_yyin &send_yyou +t &connect2client &like2re &tr_op); %EXPORT_TAGS = ( ); # eg: TAG => [ qw!name1 name2! ], our @EXPORT_OK = qw($FH_OUT); if ($^O eq "VMS") { # jhjh ( grep /gen\$com/i, @INC ) || unshift @INC, "gen\$com"; ( grep /woup:\[wou_sis_mods.com\]/i, @INC ) || unshift @INC, "woup +:[wou_sis_mods.com]"; # jhjh } # default to Unix else { ( grep /\/usr\/local\/bin/, @INC ) || unshift @INC, "/\/usr\/l +ocal\/bin"; } $DBI::drh->{debug} = 1; } our @EXPORT_OK; use subs qw(obj_srvr_connect get_data get_yyin send_yyout connect2clie +nt like2re tr_op); use DBI; use WOU_Admit; use WOU_Person; use WOU_Student; use WOU_AR; use WOU_SIS_Util; use WOU_Util; use Socket; use Safe; # ==================================================================== +======== # Package-Level Stuff # ==================================================================== +======== my $DBH; # needs to be package level (this package's sub's assume thi +s) our $FH_OUT; # let obj_srvr.pl see this my $FH_IN; my (%table_objs, %obj_accessor, %methods); # these are populated in # obj_srvr.tables # jhjh !! make sure obj_srvr.tables is readonly; it contains perl code + to be eval'ed if ($^O eq "VMS") { eval `type obj_srvr.tables`; # table layout file } # default to Unix else { eval `cat obj_srvr.tables`; } my $compartment = new Safe; $compartment->permit(qw( entereval )); # need for stuff like date_com +pare() #$compartment->permit_only(qw()); # nothing! jhjh - need to see what + to put # in here to allow what we need but + nothing # else. Even without this it seems + to stop # things like system(). $compartment->share_from('WOU_Util', [ 'date_compare' ] ); # ==================================================================== +======== # routines # (will be embedded in a C Program) # ==================================================================== +======== sub obj_srvr_connect { my ($uid, $passwd) = @_; print STDERR "before DBI->connect...\n"; # jhjh $DBH = DBI->connect('dbi:Oracle:', qq{$uid/$passwd\@(DESCRIPTION= (ADDRESS_LIST = (ADDRESS = (COMMUNITY = tcp.cedar.osshe.edu) (PROTOCOL = TCP) (HOST = 140.211.10.26) (PORT = 1541) ) ) (CONNECT_DATA = (SID = wouprd) (SRVR = DEDICATED) ) ) }, "", {debug => 1} ) or die "$!: Can't connect to DB"; print STDERR "after DBI->connect...\n"; # jhjh # help performance of select with join on remote spriden table $DBH->do("alter session set optimizer_goal = ALL_ROWS"); # defaults - will be redirected if connected to oracle_obj_srvr $FH_IN = \*STDIN; $FH_OUT = \*STDOUT; } sub connect2client { my ($remote, $port) = @_; print STDERR "host = $remote, port = $port\n"; # jhjh my ($iaddr, $paddr, $proto, $line, $pid, $cnt); $| = 1; if ($port =~ /\D/) { $port = getservbyname($port, 'tcp') } die "No port" unless $port; $iaddr = inet_aton($remote) || die "no host: $remo +te"; $paddr = sockaddr_in($port, $iaddr); $proto = getprotobyname('tcp'); socket(SOCK, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; $cnt = 0; CONNECT_LOOP: while ($cnt++ < 10 ) { sleep(2); if (connect(SOCK, $paddr) ) { print STDERR "Connected\n"; last CONNECT_LOOP; } else { print STDERR "$!: problem with connect\n" } # jhjh ( die "can't connect to lsnr") if $cnt == 10; } select(SOCK); $| = 1; # make unbuffered select(STDERR); $| = 1; # make unbuffered $FH_IN = \*SOCK; $FH_OUT = \*SOCK; select($FH_OUT); $| = 1; # make unbuffered select(STDOUT); return 1; } sub get_yyin { my $size = shift; my $yychars; sysread($FH_IN, $yychars, $size); print STDERR "read $yychars\n"; # jhjh return $yychars; } sub send_yyout { my $str = shift; print $FH_OUT $str; return 1; } sub get_data { my ($cols, $tab, $where, $order) = @_; $cols = lc($cols); $tab = lc($tab); $order = lc($order); # handle "where" separately; it may have upper-case scalars that w +e # need to preserve # oracle_obj_srvr may have broken query up into 254 byte chunks se +parated by \n $cols =~ s/\n//g; $tab =~ s/\n//g; $where =~ s/\n//g; $order =~ s/\n//g; my ($rh_driver_objs, $ra_results, $ra_support_results, $rh_result, + $rh_data, $rh_join_obj, $ra_join_objs, $col, $table, $subname, @field_or +der, $rh_stu, $rh_support_objs, $parm, $ref_key, @subst_parms, @save_parms, %filtered_results, $cnt, @save_where, $word, $subst_where, $co +l_info, @converted_order, $special_sort, %special_sort); if (!defined($cols) ) { $cols = "" } if (!defined($tab) ) { $tab = "" } if (!defined($where) ) { $where = "" } if (!defined($order) ) { $order = "" } my %query = ( "columns" => [ ], "table" => [ ], "where" => [ ], "order" => [ ] ); $special_sort = 0; print STDERR "cols = $cols\n"; # jhjh print STDERR "tab = $tab\n"; # jhjh print STDERR "where = $where\n"; # jhjh @{$query{"columns"} } = split(/,/, $cols); ($query{"table"} ) = split(/,/, $tab); $where =~ s/([^\\]),/$1\|/g; # save escaped comma's @{$query{"where"} } = split(/\|/, $where); @{$query{"order"} } = split(/,/, $order); # convert numeric col refs to names if ( defined($query{order}->[0] ) ) { if ( $query{order}->[0] =~ /^\d+$/ ) { print STDERR "converting numeric col refs\n"; # jhjh while ( $col = shift @{ $query{order} } ) { push @converted_order, $query{columns}->[$col - 1]; print STDERR "$col: ", $query{columns}->[$col - 1], " +\n"; # jhjh } push @{ $query{order} }, @converted_order; } # see if special sorts are needed foreach $col ( @{ $query{order} } ) { if (exists($obj_accessor{ $query{table} }->{$col}->{dataty +pe}) ) { if ($obj_accessor{ $query{table} }->{$col}->{datatype} eq "numeric" ) { $special_sort = 1; $special_sort{$col} = \&num_sort; # defined in WO +U_Util.pm } if ($obj_accessor{ $query{table} }->{$col}->{datatype} eq "date" ) { $special_sort = 1; $special_sort{$col} = \&date_sort; # defined in W +OU_Util.pm } } # can add check for descending sort, etc. below if needed } } print STDERR "order = $order :", @{ $query{order} }, "\n"; # jhjh $rh_driver_objs = get_driver_objs(\%query); # rh_driver_objs now +has # one or more sub ref +s as # hash keys and a has +h ref # holding the sub ref + in key # "sub" and an array +ref of # args for the sub in + key # "parms" $rh_support_objs = get_support_objs(\%query); # add any field_maps for all columns returned by supporting subs, +even # if columns are not in "select" statement (they might be in "wher +e" clause) add_field_maps($rh_support_objs, $query{table} ); # run the driver sub (Only allow 1 driver table) push @{$ra_results}, @{ &{ $rh_driver_objs->{"subref"} }( @{ $rh_driver_objs->{"parms"} }) }; # run the supporting subs foreach $rh_stu ( @{$ra_results} ) { foreach $subname (keys %{$rh_support_objs} ) { while ( shift @subst_parms ) { } # empty each time # substitute driver table column values for referential pa +rms while ( $parm = shift @{ $rh_support_objs->{$subname}->{"p +arms"} } ) { push @save_parms, $parm; if ($parm =~ /^\$/ ) { $ref_key = $parm; $ref_key =~ s/^\$//; if (exists($rh_stu->{ $ref_key } ) ) { push @subst_parms, $rh_stu->{$ref_key}; } else { # error (can't find as driver field): lea +ve "$pidm" # or whatever as parm push @subst_parms, $parm; } } else { push @subst_parms, $parm; } } # restore parms, including "$" parms while ( $parm = shift @save_parms ) { push @{ $rh_support_objs->{$subname}->{"parms"} }, $p +arm; } add2hash($rh_stu, \%{ &{ $rh_support_objs->{$subname}->{"subref"} } +( @subst_parms) }, $rh_support_objs->{$subname}->{"field_map"} ); } } # Explanation: # 1. Will translate where-clause into perl expression, then use r +eval # to check for TRUE after all column values have been plugged +in. # This will involve implementing all SQL predicate operators # (comparison, between, like, in, etc.) in (or for) obj_srvr.p +l. # Some of these need no translation (most of the comparison op +erators # mean the same thing in perl, for instance.) # 3 passes through entire population so far... $cnt = 0; # we have substituted some words in $query{where} while we were ge +tting params; # copy @{ $query{where} } onto $where putting spaces between words $where = ""; push @save_where, @{ $query{where} }; while ( defined($word = shift @{ $query{where} } ) ) { $where .= $ +word . " " } $where =~ s/\s*$//; push @{ $query{where} }, @save_where; $where =~ s/([^\\])'/$1"/g; # allow escaped single quotes to stay $where =~ s/\\//g; # remove escape char's, now that we're do +ne w/ them $where =~ s/^/ /; # put space at beginning, makes substitution be +low # work for first word print STDERR "WHERE = ", $where, "\n"; # jhjh while ( $rh_stu = shift @{$ra_results} ) { $subst_where = $where; # reset # substitute column vals into where expression in order to eva +luate # (only subst if word is delimited by spaces or reg. exp. slas +hes) foreach $col (keys %{$rh_stu} ) { $col = lc($col); # all hash column names are lower case, +so # need this to make sure substitution re +ferences # the actual column name and not the cap +italized # hash column name. Still need to do ca +se-insensitive # substitution below because the query c +olumn name # may be upper case. # turn nulls into null strings if (!defined($rh_stu->{$col} ) ) { if (exists($obj_accessor{ $query{table} }->{$col}->{da +tatype}) and $obj_accessor{ $query{table} }->{$col}->{datatype} eq "numeric" ) { $rh_stu->{$col} = 0; } else { $rh_stu->{$col} = "" } } $subst_where =~ s/ $col / "$rh_stu->{$col}" /ig; # /i han +dles upper- # case q +uery col names $subst_where =~ s/\/\^$col\$\//\/\^$rh_stu->{$col}\$\//ig; } if (where_clause_true($subst_where) ) { $filtered_results{++$cnt} = $rh_stu; } } # jhjh push @field_order, @{$query{columns} }; if ($order) { if ($special_sort) { $ra_results = compound_sort(\%filtered_results, $query{"or +der"}, \%special_sort ); } else { $ra_results = compound_sort(\%filtered_results, $query{"or +der"} ); } } else { push @{ $ra_results }, values %filtered_results } foreach $col ( @{ $query{columns} } ) { $col_info .= $col . ":" . $obj_accessor{ $query{table} }->{$col}->{size +} . "|"; } $col_info =~ s/\|$//; print $FH_OUT "$col_info\n"; output_delimited($FH_OUT, $ra_results, $query{columns}, "", ""); print $FH_OUT "\n$cnt rows returned\n"; return 1; } sub get_driver_objs { my $rh_query = shift; my %driver_objs = ( "subname" => $table_objs{ $rh_query->{table} }->{subname}, "subref" => $table_objs{ $rh_query->{table} }->{subref}, "parms" => get_parms(0, $table_objs{ $rh_query->{table} }- +>{subname}, $rh_query) ); return \%driver_objs; } sub get_support_objs { my $rh_query = shift; my ($subname, %support_objs, $col); foreach $col (@{$rh_query->{columns} } ) { # next if column is a driver sub column next if $obj_accessor{$rh_query->{table} }->{$col}->{subname} +eq "SELF"; print STDERR "support_objs: col = $col\n"; # jhjh print STDERR "support_objs->subname = ", $obj_accessor{ $rh_query->{ta +ble} }->{$col}->{subname}, "\n"; # jhjh # these get populated the same way multiple times if several f +ields share # a subname # Data looks like this: # $support_objs{"get_addr_lo"}->{"subref"} = \&get_addr, for e +xample $support_objs{ $obj_accessor{ $rh_query->{table} }->{ $col}->{subname} }->{"subref"} = $obj_accessor{ $rh_query->{table} }->{$col}->{subref}; $support_objs{ $obj_accessor{ $rh_query->{table} }->{ $col}->{subname} }->{"parms"} = get_parms(1, $obj_accessor{$rh_query->{table} }->{$col}-> +{subname}, $rh_query); # jhjh - don't need this, done elsewhere now # This adds a new field_map pair each time. # Data looks like this: # $support_objs{"get_addr_lo"}->{"field_map}->{"city"} = "city +_lo", # for example, where "city is the fieldname returned by the su +bref and # "city_lo" is the fieldname to be used in the virtual table b +eing created. # $support_objs{ # $obj_accessor{ $rh_query->{table} }->{$col}->{subnam +e} # }->{"field_map"}->{ # $obj_accessor{$rh_query->{table} }->{$col}->{field} # } = $col; } # maybe there's a column in the "where" clause but not in the sele +ct column # list, and it's sub is not shared with any of the select list col +umns foreach $col (keys %{ $obj_accessor{ $rh_query->{table} } } ) { if ( grep /^$col$/i, @{ $rh_query->{where} } and $obj_accessor{ $rh_query->{table} }->{$col}->{subname} ne + "SELF" ) { # Data looks like this: # $support_objs{"get_addr_lo"}->{"subref"} = \&get_addr, f +or example $support_objs{ $obj_accessor{ $rh_query->{table} }->{ $col}->{subname} }->{"subref"} = $obj_accessor{ $rh_query->{table} }->{$col}->{subr +ef}; $support_objs{ $obj_accessor{ $rh_query->{table} }->{ $col}->{subname} }->{"parms"} = get_parms(1, $obj_accessor{$rh_query->{table} }->{$co +l}->{subname}, $rh_query); } } return \%support_objs; } sub get_parms { my ($rec_key, $subname, $rh_query) = @_; my ($parm, @parms); foreach $parm (@{$methods{$subname}->{parms} } ) { push @parms, get_parm_val($rec_key, $parm, $rh_query); } return \@parms; } sub get_parm_val { my ($rec_key, $parm, $rh_query) = @_; my ($word, $got_word, @subst_where); if ($parm eq "dbh") { return $DBH } # package var if ($rec_key == 0 ) { # i.e. sub is the driver ("table"), so we r +equire # params to be scalar predicates in where-c +lause. # for drivers ("tables") we require single-valued, "=" params. # we will handle "!=", "in", "like" values at a higher level a +nd # simply run the sub multiple times (maybe?). $got_word = 0; while ( $word = shift @{$rh_query->{where} } ) { if ($got_word) { $got_word++ } if (lc($word) eq $parm) { # so any future parms must also +be lc $got_word = 1; $word = "TRUE"; # replace driver parms with true stat +ements; # reval of where clause doesn't need +to # look at these again, and any "%" va +lues # will wrongly fail the revel # jhjh ! May want to rethink TRUE = T +RUE # idea: need to handle conditions ot +her than # "=" on driver parm columns (like "i +n", "!="). # Would be good to pass this to the e +val like # everything else. Have to think of +another # way to get around parms that accept + "=%". } if ($got_word == 3) { push @subst_where, "TRUE"; unshift @{ $rh_query->{where} }, @subst_where; $word =~ s/'//g; # don't want single quotes as part o +f # the string return $word; } # 2 is "=" push @subst_where, $word; } # won't be reached unless parm not in where clause unshift @{ $rh_query->{where} }, @subst_where; } else { # Params are referential: they come from the driver objec +t. # (We are representing a single table to the user, but pul +ling # the data from a driver object and whatever supporting # subroutines we need.) If a supporting subroutine requir +es # a parameter that is not referential (like gpa_type for a +ll_gpa), # we will create additional columns for the possible value +s. I.e., # column cgpa_o is gpa_type 'O' (overall), column cgpa_t i +s # gpa_type 'T' (transfer), etc. return $parm; # substitute after getting driver records # jhjh - still need to handle scalar where-clause conditio +ns for # support obj columns - i.e. a required param for a suppor +t object # cannot be figured out referentially. Handle these with +higher # level wrapper subs. # fall-through return; } } sub add_field_maps { my ($rh_support_objs, $table) = @_; my ($col, $subname); COL_LOOP: foreach $col (keys %{ $obj_accessor{$table} } ) { next COL_LOOP if $obj_accessor{$table}->{$col}->{"subname"} eq + "SELF"; # fall-through foreach $subname ( keys %{ $rh_support_objs } ) { # only put it in if we used it if ($obj_accessor{$table}->{$col}->{"subname"} eq $subname + ) { if ( exists( $obj_accessor{$table}->{$col}->{"field"} +) ) { # Data looks like this: # $support_objs{"get_addr_lo"}->{"field_map}->{ # "city"} = "city_lo", for example $rh_support_objs->{$subname}->{"field_map"}->{ $obj_accessor{$table}->{$col}->{"field"} } = $ +col; } } } } } sub where_clause_true { my $where = shift; # note: we are guaranteed white space between operands and operat +ors # because of how we processed the where clause earlier # (may come in handy to know this) # need to think about how to skip these substitutions if character + is inside # a string (maybe in parser translate these chars to something els +e if they're # in a string, then translate them back further below # Need to use safe eval, or build in some checking for system() an +d # backticks, etc. (i.e. "where lname = `<dangerous os command>` " +for # where clause... ). Safe->reval should do it. # print STDERR "EVAL where = $where\n"; # jhjh # use reval to see if substituted where clause is true; $compartment->reval( qq{ if ($where) { return 1 } # fall-through return 0; } ); } sub like2re { my ($str, $word, $negative); $negative = 0; # stop on LIKE for LIKE/NOT LIKE while ( ($word = shift) !~ /^like$/i ) { if ($word =~ /^not$/i) { $negative = 1; } else { $str .= $word . "," } # replace whitespace w/ comma's +just like the # parser does } $str .= $word . ","; # add LIKE to $str $word = shift; # $word now holds the SQL LIKE expression # if LIKE expr contains SQL % wildcard, then turn into perl reg ex +p if ( $word =~ /'(.*%.*)'/i ) { $word = $1; $str =~ s/like,$/=~,/i; $negative && ( $str =~ s/=~/!~/ ); $word =~ s/^([^%])/\^$1/; $word =~ s/([^%])$/$1\$/; $word =~ s/%/\.\*/g; $word = "/" . $word . "/"; } # otherwise turn like into "=" ( we will turn "=" into "eq" in ge +t_data(), # this sub is used by the yacc parser) else { $str =~ s/like,$/=,/i; } $str .= $word; return $str; } sub tr_op { my ($table, $lval, $op, $rval) = @_; print STDERR "\$lval = $lval, \$op = $op, \$rval = $rval\n"; # jhjh if ( $op eq '<>') { $op = "!=" } # translate $op for strings if ( $lval =~ /^'.*'$/ or $rval =~ /^'.*'$/ ) { $op = $op eq '=' ? 'eq' : $op eq '!=' ? 'ne' : $op eq '<' ? 'lt' : $op eq '<=' ? 'le' : $op eq '>' ? 'gt' : $op eq '>=' ? 'ge' : $op; } else { $op = $op eq '=' ? '==' : $op; } # translate $op for dates if (exists($obj_accessor{lc($table) }->{lc($lval) }->{datatype}) a +nd $obj_accessor{lc($table) }->{lc($lval) }->{datatype} eq "date" ) { $op = "date_compare_$op"; } return $op; } return 1;

get_data() is passed $cols, $tab, $where, $order by the lex/yacc parser.

The package also has some functions that are used by the lex/yacc parser to parse the query.

The data retrieval functions are in separate packages.

The .tables file can represent any data model as long as you have the data retrieval functions for it (we use perl DBI against Oracle, but that is not a requirement).

Data retrieval functions must return references to arrays of hash references ("records") of the form:

{ "id" => 12345, "lname" => "Jones", "fname" => "Mary, ... }

if they are "driver functions (return the base information for a population), or they must return hash references ("records") if they are supporting functions (return the "join table" information).

Only single-table queries against the virtual warehouse are supported.

For a more complete description see http://www.wou.edu/~hickerj/