#!/usr/bin/perl -wT #DEFINE EXTERNAL MODULE USES use CGI; use CGI::Carp qw(fatalsToBrowser); use strict; use Encode; use Encode qw(encode decode); use open qw( :std :encoding(UTF-8) ); print "Content-type: text/html; charset=utf-8\n\n"; #print CGI::header(); binmode STDOUT, ":utf8"; =item NOTES OPERATOR ORDER OF PRECEDENCE NOT / ^ OR / | XOR AND / & "AND" is the default operator if words are entered without an operator. NOTE: These operators are currently accepted in UPPERCASE only. =cut our $thisprogram = "search-test.pl"; #MAY NEED TO BE FULLY QUALIFIED URL our $line=""; #I'M TOO LAZY TO DECLARE IT AGAIN EVERYWHERE IT'S NEEDED. our %input; our $buffer=""; our $pair=""; our @pairs=(); our $DEBUG=0; #SET TO TWO FOR A MORE STEP-BY-STEP OUTPUT our $PAD = qw'(<>)'; #CONSTANT THAT STILL SEEMS TO ONLY WORK PART OF THE TIME our $table = 'MyTable'; #JUST AN EXAMPLE, AS THIS 'TEST' SCRIPT DOES NOT ATTEMPT our $columnName = 'MyTextColumn'; #TO CONNECT TO AN ACTUAL DATABASE if ($ENV{CONTENT_LENGTH}) { read(STDIN, $buffer, $ENV{CONTENT_LENGTH}); @pairs = split(/&/,$buffer); } else { my $buffer = $ENV{QUERY_STRING}; @pairs = split(/\&/,$buffer); } foreach $pair (@pairs) { $pair=~s/`/ /g; #REMOVE BACKTICKS AS A SECURITY AND FUNCTIONALITY MEASURE TO PROTECT REMAINDER OF SCRIPT #$input{translate}=''; my ($name, $value) = split(/=/,$pair); $value =~ tr/+/ /; $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; $name = decode("utf-8", $name); $value = decode("utf-8", $value); $input{$name} = $value; } $line=$input{terms}; if ($DEBUG==2) {print "10: Current line: $line

\n"}; $line =~ s/\(/PaReNtHeSiSLEFT/g; #REMOVE ALL () FROM ORIGINAL SEARCH FOR SECURITY AND FUNCTIONALITY $line =~ s/\)/PaReNtHeSiSRGHT/g; #REMOVE ALL () FROM ORIGINAL SEARCH FOR SECURITY AND FUNCTIONALITY $line =~ s/(\(|\))/ $1 /g; #PAD ORIGINAL PARENTHESES WITH SPACES FOR NOW $line =~ s/""/QuOtEd_StRiNg/g; #TEMPORARILY REPLACE DOUBLE QUOTES WITH A KEEPER TAG my ($i,$p,$toggle,$terms)=(0,'',"FALSE",$line); $line=''; for ($i = 0; $i < length($terms); $i++) { $p = substr($terms, $i, 1); #DEAL WITH QUOTED PHRASES if ($p eq '"') { if ($toggle eq "FALSE") { $toggle = "TRUE" } else { $toggle = "FALSE" }}; if ($toggle eq "TRUE") { $p =~ s/\s/()/; #REPLACE SPACES WITH () TEMPORARILY TO KEEP QUOTE TOGETHER $p =~ s/([`~=?.+*&^%$#@!<>[\]{}:'])/\\$1/; #ESCAPE ALL OF THESE CHARACTERS IN THE QUOTE (SOME OF THESE ARE FOR PERL COMPATIBILITY) #$p =~ s/\\/\\\\/g; #ESCAPE THE ESCAPE CHARACTER ITSELF, IF IN A QUOTE $p =~ s/(')/$PAD$1$PAD/; #PAD APOSTROPHES FOR NOW SO THAT THEY WILL NOT BECOME WORD BOUNDARIES $p =~ s/(,)/$PAD$1$PAD/; #PAD COMMAS FOR NOW SO THAT THEY WILL NOT BECOME WORD BOUNDARIES $p =~ s/\r/\\\\r/g; #REPLACE RETURN CHARACTER WITH MYSQL-ESCAPED EQUIVALENT $p =~ s/\n/\\\\n/g; #REPLACE LINE-FEED CHARACTER WITH MYSQL-ESCAPED EQUIVALENT $p =~ s/\t/\\\\t/g; #REPLACE TAB CHARACTER WITH MYSQL-ESCAPED EQUIVALENT } else { $p =~ s/%/\\%/g; #ESCAPE ALL PERCENT SIGNS [THIS STEP *MUST* PRECEDE * WILDCARD SUBST.] if ((substr($terms, $i-1, 1)=~/\p{IsWord}/) && (substr($terms, $i+1, 1)=~/\p{IsWord}/)) { $p =~ s/-/$PAD\-$PAD/; #PAD UNQUOTED MINUS SIGNS IN WORDS $p =~ s/\*/%/; #REPLACE ALL WILDCARD ASTERISKS WITH CORRECT DATABASE WILDCARD $p =~ s/(')/$PAD$1$PAD/; #PAD APOSTROPHES FOR NOW SO THAT THEY WILL NOT BECOME WORD BOUNDARIES } else { $p =~ s/[<>]/ /; #REPLACE THESE UNQUOTED CHARACTERS WITH A SPACE, PRESERVING WORD BOUNDARY }; #if ($DEBUG==2) {print "15: Current line: $line

\n"}; $p =~ s/[`~=.,+\n\r*!\/\[\]]/ /; #REPLACE THESE UNQUOTED CHARACTERS WITH A SPACE, PRESERVING WORD BOUNDARY $p =~ s/\\(?!\%)/ /g; #EXCEPT IN THE CASE OF AN ESCAPED PERCENT SIGN, CONVERT SLASHES TO SPACE $p =~ s/([~=$#\@{}:'])/\\$1/; #ESCAPE THESE UNQUOTED CHARACTERS $p =~ s/_/$PAD\\_$PAD/; #ESCAPE/PAD UNDERLINE CHARS [THIS STEP *MUST* PRECEDE ? WILDCARD SUBST.] $p =~ s/\?/_/; #REPLACE ALL WILDCARD QUESTON MARKS WITH CORRECT DATABASE WILDCARD $p =~ s/\|/ OR /; #REPLACE UNQUOTED PIPES WITH 'OR' OPERATOR TERM $p =~ s/\&/ AND /; #REPLACE UNQUOTED AMPERSANDS WITH 'AND' OPERATOR TERM $p =~ s/\^/ NOT /; #REPLACE UNQUOTED EXPONENT SIGNS WITH 'NOT' OPERATOR TERM $p =~ s/^-/ /; #REMOVE UNQUOTED MINUS SIGNS OUTSIDE OF WORDS }; $line .= $p; }; if ($DEBUG==2) {print "20: Current line: $line

\n"}; $line =~ s/((?:".*?")|(?:\(.*?\)))* #MATCH AND HOLD ANY QUOTED OR PARENTHETICAL STRING (?:^|\s|\b) (?\)) (?!["()]|(?:(?<=\d)\:)) #DO NOT ALLOW PATTERN TO MATCH IF ADJACENT TO ONE OF ABOVE (?! (?:\s|\)) (?:OR|AND|NOT|XOR) (?:\s|\() ) #DO NOT MATCH A SEARCH OPERATOR TERM (?=(?:\(<>\))*) #INCLUDE ANY OF OUR PADDING WE INTRODUCED EARLIER ( (?:(?:\(<>\))* |(?:\\.{1})* |(?:\d+\:\d+)* |[^"() ])+) #BUT MATCH OTHER WORDS, INCLUDE ANY ESCAPED CHARACTER (?>(?:\(<>\))*) #INCLUDE ANY ADDITIONAL PADDING (?!["()]|(?:\:(?>\d))) #DO NOT ALLOW PATTERN TO MATCH IF ADJACENT TO ONE OF THESE (?:\s|$|\b) (?\)) ((?:".*?")|(?:\(.*?\)))* #MATCH AND HOLD ANY FURTHER QUOTED OR PARENTHETICAL STRINGS /$1 ($2) $3/gx; #SURROUND MATCHES FROM ABOVE WITH PARENTHESES (WORDS OR PHRASES) if ($DEBUG==2) {print "30: Current line: $line

\n"}; $line =~ s/\(<>\)//g; #REMOVE OUR PADDING NOW, AS IT HAS DONE ITS JOB $line =~ s/\(\)/ /g; #REMOVE OUR () SPACE REPLACEMENTS AND PUT SPACES BACK $line =~ s/\(\\'\)/\\'/g; #FIX ANY DANGLING APOSTROPHES WHICH WERE SEEN AS WORD BOUNDARIES if ($DEBUG==2) {print "40: Current line: $line

\n"}; # $line =~ s/( ( \([^)]+\) \s* OR (?:\s+|\b) )+ #GROUP IN PARENTHESES ALL SEARCH TERMS JOINED BY 'OR' ( \(.*?\) ) )/($1)/gx; #THIS WILL GIVE 'ORed' WORDS LOGICAL PRECEDENCE if ($DEBUG==2) {print "50: Current line: $line

\n"}; $line =~ s/(\(\s+\()/((/g; #REMOVE SPACES BETWEEN NESTED LEFT PARENTHESES $line =~ s/(\)\s+\))/))/g; #REMOVE SPACES BETWEEN NESTED RIGHT PARENTHESES if ($DEBUG==2) {print "60: Current line: $line

\n"}; $line =~ s/"([^"]+)"/($1)/g; #REPLACE QUOTES FOR QUOTED STRINGS WITH PARENTHESES if ($DEBUG==2) {print "70: Current line: $line

\n"}; $line =~ s% #RECURSIVELY LOCATE ALL PARENTHESIZED TERMS AND PHRASES (\() ( (?> [^()]+) #Match no parentheses without recursion | (??{ $line }) # Group with matching parentheses )* | (\)) %AND $1$table\.$columnName LIKE "\%$2\%"$3%gx; #FORMAT PARENTHETICAL TERMS INTO SELECT QUERY FORM $line=~s/\s*(AND)*\s*$table\.$columnName\sLIKE\s"\%\s*\%"\s*//g; #REMOVE EMPTY SELECT TERMS $line=~s/\)\s*AND\s+AND\s*\(/) AND (/g; #REPLACE DOUBLE 'AND' WITH SINGLE 'AND' OPERATOR $line=~s/\)\s*NOT\s+AND\s*\(/) AND NOT (/g; #CHANGE 'NOT AND' TO 'AND NOT' $line=~s/\)\s*XOR\s+AND\s*\(/) XOR (/g; #REMOVE 'AND' IN 'XOR AND' $line=~s/\)\s*OR\s+AND\s*\(/) OR (/g; #REMOVE 'AND' IN 'OR AND' $line=~s/QuOtEd_StRiNg/\\"/g; #REPLACE OUR QUOTE MARKER WITH AN ESCAPED QUOTATION MARK $line=~s/PaReNtHeSiSLEFT/\\(/g; #REPLACE PARENTHESES $line=~s/PaReNtHeSiSRGHT/\\)/g; #REPLACE PARENTHESES #1 while ($line=~s/\(\s*AND\s*\(/((/g); if ($DEBUG==2) {print "80: Current line: $line

\n"}; 1 while ($line=~s/(\() (?: \s* | (\(*) )* AND (?: \s* | (\(*) )* /$1$2$3/x); $line=~s/^\s*AND\s*(\()/$1/; 1 while ($line=~s/^\s*\((\s|\n|\r)*\)\s*(AND)*\s*//); #REMOVE LEADING EMPTY PARENTHETICAL TERM, IF EXISTS 1 while ($line=~s/^\s*(OR|XOR|AND)+\s//); #REMOVE SURPLUS LEADING OPERATOR TERMS, IF EXIST if ($DEBUG==2) {print "90: Current line: $line

\n"}; print < SEARCH ENGINE TESTING

Enter your test search into the search box.

$line HTML