Beefy Boxes and Bandwidth Generously Provided by pair Networks chromatic writing perl on a camel
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

Re: Get me excited about perl

by BrowserUk (Pope)
on Sep 19, 2012 at 19:55 UTC ( #994514=note: print w/ replies, xml ) Need Help??


in reply to Get me excited about perl

Show them Perl's greatest asset -- concise solutions to everyday problems.

If you have time, pick some task that will resonate with as many of them as possible and get one or more of them to solve the chosen task in their favoured langugages before the day.

By way of example (because the examples already exist): frequency count the words in a text file.

  • C (211 lines):
    /* -*- mode: c -*- * $Id: wordfreq.gcc,v 1.5 2001/09/18 17:25:18 doug Exp $ * http://www.bagley.org/~doug/shootout/ * Changed by Adrian Merrill 2001/08/22 */ #include <stdio.h> #include <ctype.h> #include <malloc.h> #include <stdlib.h> #include <string.h> #include "../../Include/simple_hash.h" #define QUICKIE_STRCMP(a, b) (*(a) != *(b) ? *(a) - *(b) : strcmp((a) +, (b))) typedef int (*comparator)(const void *, const void *); int cmp_hash(struct ht_node **a, struct ht_node **b) { int val = (*b)->val - (*a)->val; return((val == 0) ? QUICKIE_STRCMP((*b)->key, (*a)->key) : val); } int main() { int readbufsize = 4096; int wordbufsize=16; char *readbuf = (char *)malloc(readbufsize + 1); char *wordbuf = (char *)malloc(wordbufsize + 1); int i = 0; struct ht_ht *ht = ht_create(2048); struct ht_node **sort_array, **sort_tmp, *node; /*new code*/ int nread =0; int wordlen=0; readbuf[0]=0; while (readbuf[i] > 0||(nread = fread(readbuf, sizeof(char), readb +ufsize, stdin),readbuf[nread] = '\0',i=0,nread > 0) ) { if (isalpha(readbuf[i])){ wordbuf[wordlen++] = tolower(readbuf[i]); if (wordlen == wordbufsize) { wordbufsize *= 2; wordbuf = realloc(wordbuf, wordbufsize + 1); } } else{ if (wordlen > 0) { wordbuf[wordlen] = '\0'; ++(ht_find_new(ht, wordbuf)->val); wordlen = 0; } } i++; } free(readbuf); free(wordbuf); sort_array = sort_tmp = malloc(sizeof(struct ht_node *) * ht_count(ht)); for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht +)) ; qsort(sort_array, ht_count(ht), sizeof(struct ht_node *), (comparator)cmp_hash); for (i=0; i<ht_count(ht); i++) printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); + ht_destroy(ht); return(0); }
  • C++ (79 lines):
    // -*- mode: c++ -*- // $Id: wordfreq.g++,v 1.5 2001/07/21 23:51:05 doug Exp $ // http://www.bagley.org/~doug/shootout/ // By Tamás Benkő #include <cstdio> #include <cctype> #include <cstring> #include <ext/hash_map> #include <vector> #include <algorithm> using namespace std; int const bufsize = 4096; int const wsize = 64; class word_reader { int ws; char buf[bufsize+1], *bptr, *word; FILE *input; bool fill(); public: word_reader(FILE *i): ws(wsize), bptr(buf), word(new char[ws+1]), +input(i) {*bptr = *word = '\0';} int operator()(char const **); }; inline bool word_reader::fill() { int nread = fread(buf, sizeof(char), bufsize, input); buf[nread] = '\0'; bptr = buf; return nread > 0; } int word_reader::operator()(char const **w) { int len = 0; char c; while (*bptr || fill()) { if (isalpha(c = *bptr++)) { word[len] = tolower(c); if (++len == ws) { char *nword = new char[(ws *= 2)+1]; memcpy(nword, word, len); delete[] word; word = nword; } } else if (len > 0) break; } *w = word; word[len] = '\0'; return len; } typedef hash_map<char const *, int> counter; typedef pair<char const *, int> hpair; namespace std { inline bool operator<(hpair const &lhs, hpair const &rhs) { return lhs.second != rhs.second ? lhs.second > rhs.second : strcmp(lhs.first, rhs.first) > 0; } template<> struct equal_to<char const *> { bool operator()(char const *s1, char const *s2) const {return strcmp(s1, s2) == 0;} }; } int main() { int len; const char *w; counter hist; word_reader wr(stdin); while ((len = wr(&w)) > 0) { counter::iterator i = hist.find(w); if (i == hist.end()) hist[strcpy(new char[len+1], w)] = 1; else ++i->second; } vector<hpair> v(hist.begin(), hist.end()); sort(v.begin(), v.end()); for (size_t i = 0; i < v.size(); ++i) printf("%7d\t%s\n", v[i].second, v[i].first); return 0; }
  • Java (68 lines):
    // $Id: wordfreq.java,v 1.3 2000/12/17 21:40:53 doug Exp $ // http://www.bagley.org/~doug/shootout/ // Collection class code is from my friend Phil Chu, Thanks Phil! import java.io.*; import java.util.*; import java.text.*; class Counter { int count = 1; } public class wordfreq { public static void main(String[] args) { wf(); } public static String padleft(String s,int n,char c) { int len = s.length(); if( len>=n ) return s; char[] buf = new char[n]; for( int i=0;i<n-len;i++ ) buf[i]=c; s.getChars(0,len,buf,n-len); return new String(buf); } public static void wf() { HashMap map = new HashMap(); try { Reader r = new BufferedReader(new InputStreamReader(System +.in)); StreamTokenizer st = new StreamTokenizer(r); st.lowerCaseMode(true); st.whitespaceChars( 0, 64 ); st.wordChars(65, 90); st.whitespaceChars( 91, 96 ); st.wordChars(97, 122); st.whitespaceChars( 123, 255 ); int tt = st.nextToken(); while (tt != StreamTokenizer.TT_EOF) { if (tt == StreamTokenizer.TT_WORD) { if (map.containsKey(st.sval)) { ((Counter)map.get(st.sval)).count++; } else { map.put(st.sval, new Counter()); } } tt = st.nextToken(); } } catch (IOException e) { System.err.println(e); return; } Collection entries = map.entrySet(); // flatten the entries set into a vector for sorting Vector rev_wf = new Vector(entries); // Sort the vector according to its value Collections.sort(rev_wf, new Comparator() { public int compare(Object o1, Object o2) { // First sort by frequency int c = ((Counter)((Map.Entry)o2).getValue()).count - ((Co +unter)((Map.Entry)o1).getValue()).count; if (c == 0) { // Second sort by lexicographical order c = ((String)((Map.Entry)o2).getKey()).compareTo((String)( +(Map.Entry)o1).getKey()); } return c; } } ); Iterator it = rev_wf.iterator(); Map.Entry ent; String word; int count; while(it.hasNext()) { ent = (Map.Entry)it.next(); word = ((String)ent.getKey()); count = ((Counter)ent.getValue()).count; System.out.println(padleft(Integer.toString(count),7,' ') + "\ +t" + word); } } }
  • Python (24 lines):
    #!/usr/local/bin/python # $Id: wordfreq.python,v 1.9 2001/05/11 17:44:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ # # adapted from Bill Lear's original python word frequency counter # # Joel Rosdahl suggested using translate table to speed up # word splitting. That change alone sped this program up by # at least a factor of 3. # # with further speedups from Mark Baker import sys def main(): count = {} i_r = map(chr, range(256)) trans = [' '] * 256 o_a, o_z = ord('a'), (ord('z')+1) trans[ord('A'):(ord('Z')+1)] = i_r[o_a:o_z] trans[o_a:o_z] = i_r[o_a:o_z] trans = ''.join(trans) rl = sys.stdin.readlines lines = rl(4095) while lines: for line in lines: for word in line.translate(trans).split(): try: count[word] += 1 except KeyError: count[word] = 1 lines = rl(4095) l = zip(count.values(), count.keys()) l.sort() l.reverse() print '\n'.join(["%7s\t%s" % (count, word) for (count, word) in l] +) main()
  • Haskell (20 lines):
    -- $Id: wordfreq.ghc,v 1.2 2001/02/27 04:04:35 doug Exp $ -- http://www.bagley.org/~doug/shootout/ -- from Julian Assange -- compile with: -- ghc -O -package data wordfreq.hs -o wordfreq module Main where import List(sortBy) import Char(toLower,isLower) import FiniteMap(fmToList,emptyFM,addToFM_C) main = interact $ unlines . pretty . sort . fmToList . makemap . cword +s . lower where pretty l = [pad 7 (show n) ++ "\t" ++ w | (w,n) <- l] where pad n s = replicate (n - length s) ' ' ++ s sort = sortBy (\(w0,n0) (w1,n1) -> case compare n1 n0 of EQ -> compare w1 w0 x -> x) makemap = addFM emptyFM where addFM fm [] = fm addFM fm (x:xs) = addFM (addToFM_C (+) fm x 1) xs cwords s = case dropWhile (not . isLower) s of "" -> [] s' -> w : (cwords s'') where (w, s'') = span isLower s' lower = map toLower

And then write & run your perl solution in real time:

perl -nle"y/a-zA-Z/ /cs; ++$h{$_} for split }{ print qq[$_:$h{$_}] for + sort keys %h" theFile break:1 brief:1 bring:3 brought:2 buffalo:16 burden:1 but:20 by:16 call:2 called:6 came:2 campaign:1 can:36 cannot:2 capable:1 capitals:1 career:2 cart:1 case:2 ...

With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.

RIP Neil Armstrong


Comment on Re: Get me excited about perl
Select or Download Code
Re^2: Get me excited about perl
by tobyink (Abbot) on Sep 19, 2012 at 20:21 UTC

    php -R 'foreach (str_word_count(strtolower($argn), 1) as $w) $h[$w]++;' -E 'ksort($h); foreach ($h as $w=>$c) print "$w:$c\n";' <theFile

    Update: it's sorted now.

    perl -E'sub Monkey::do{say$_,for@_,do{($monkey=[caller(0)]->[3])=~s{::}{ }and$monkey}}"Monkey say"->Monkey::do'

      Is the output sorted?


      With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
      Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
      "Science is about questioning the status quo. Questioning authority".
      In the absence of evidence, opinion is indistinguishable from prejudice.

      RIP Neil Armstrong

      tobyink,

      Okay, what's the joke?

      You brought 'php' to a thread on 'Get me excited about perl'!

      I can always use a good laugh...Ed

      "Well done is better than well said." - Benjamin Franklin

        You brought 'php' to a thread on 'Get me excited about perl'!

        Only as a reply to a post which included C, C++, Java, Python and Haskell source code.

        My point is that Perl is not unique in its ability to provide concise solutions to text processing problems. The PHP solution is a little longer, sure, but is arguably more readable than the Perl one (to somebody who knows both languages) due to not needing to rely on idioms like the Eskimo kiss operator.

        perl -E'sub Monkey::do{say$_,for@_,do{($monkey=[caller(0)]->[3])=~s{::}{ }and$monkey}}"Monkey say"->Monkey::do'

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://994514]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others chilling in the Monastery: (8)
As of 2014-04-16 23:58 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    April first is:







    Results (436 votes), past polls