# note the comma. This will unshift the first command line # argument onto @ARGV and set $_ unshift @ARGV, $_ = $ARGV[0]; # clear $_ if its length is not 2 $_ = '' if length $_ != 2; # if the first argument is -p or -P, print The Perl Journal if (/-p/i) { # \cH is a backspace printf "*\cHThe Perl Journal\n"; exit 0; } # set $i[11] to the length of @ARGV (remember, this is one greater # than the number of arguments) and set $i[10] to the original length # (number of files on the command line $i[10] = ($i[11] = scalar @ARGV) - 1; # check the while{} at the end # this will execute once for each file do { # if we had no command line arguments, we need to set $i[10] to 1 to ensure # the loop will exit and we need to read our arguments from STDIN. if ($i[11] < 2) { $i[10] = 1; *F = *STDIN; } # if we had arguments, we want to open each file in turn. Note that O_RDONLY # is a filehandle, not a constant. Also note that with the while at the end # of this loop, $i[10] is being decremented, so we can loop through the # files this way. else { open O_RDONLY, $ARGV[$i[11] - $i[10]]; *F = *O_RDONLY; } # read one byte at a time from the file, until EOF. while (read(F, $i, 1) > 0) { # increment $i[4] by one (thus, this will be the file size) ++$i[4]; # set $_ to whatever byte we read $_ = $i; # if $_ is a newline, then the match will return 1, thus setting $i[3] to # the number of lines in the file. ( *pp^0x0A) is superfluous in the # Perl program # But it is used in the C program. My C is pretty rusty, but here goes: # i[3] += m = ( *pp^0x0A ) ? 0 : 1; # pp has been set to i, the last character read. Here, we do an XOR # with 0x0A (a newline character). If any bits are set, we know it's # not a newline, so m is set to 0, else m is set to 1. i[3] is # incremented by m. # The /* in the regex appears to be an artifact left over from an # embedded comment # in the obfu: $i[3]+=m=( *pp^0x0A)?/*\n=;#*/0:1; $i[3] += m[( *pp^0x0A)?/*\n]; #------------------------------------- # The following section is rather confusing. It is a word count. It works by # setting $i[1] to a true value when it encounters a white space charater and # then incrementing $[2] by one when it encounters a non-whitespace character # (whitespace as defined by the character class in the match) # Again, we see the /* as an artifact from the original file: # if(m=/*[ \n\f\r\xB]=#*/q # Ff we match a space, newline, formfeed, carriage return, or cntl-B(?) # I believe this is where the embedded tab should be, but on my system, # it was transformed to a space. if (m[/*[ \n\f\r\xB]]) { # if we've set $i[1], then we want to increment $i[2] by one # and reset $i[1] to 0 (false) if ($i[1]) { ++$i[$i[1]]; $i[1] = 0; } } # if we didn't match, set $i[1] to 2 (which is the index of the array # element we wish to increment for counting the above characters). For # the most part, this means, "set this variable if we have a non- # whitespace character" else { $i[1] = 2; } } #------------------------------------- # if we got this far and $i[1] is true (it will be set to 2), then we have # an extra word that we didn't account for, so we add 1 to the word count if ($i[1]) { ++$i[$i[1]]; } # print number of lines, word count, file size, and the name of the file printf "%7d %7d %7d %s\n", $i[3], $i[2], $i[4], $ARGV[$i[11] - $i[10]]; close F; # if we had more than one argument, we need to total the results if ($i[11] > 2) { # This is setting $i[6] to $i[8] by adding whatever is in @i[2..4] and # then resetting that value. When we get to $i[5], it's never been set # and is evaluated as zero. This causes the entire expression # '$i[$i[1] + 4] += $i[$i[1]]' to return a zero, evaluating as false and # thus terminating the loop. for ($i[1] = 2; $i[$i[1] + 4] += $i[$i[1]]; ++$i[1]) { $i[$i[1]] = 0; } $i[1] = 0; } } while --$i[10]; # if we had more than one argument, we need to print the results if ($i[11] > 2) { printf "%7d %7d %7d total\n", $i[7], $i[6], $i[8]; }