#!/usr/bin/perl #abbreviation script (ab) #Last Updated 05.09.09 use strict; use utf8; my $option = "no"; my $length; my @sorted_abbr; my @lgr = ("1=first person", "2=second person", "3=third person", "A=agent-like argument of canonical transitive verb", "ABL=ablative", "ABS=absolutive", "ACC=accusative", "ADJ=adjective", "ADV=adverb(ial)", "AGR=agreement", "ALL=allative", "ANTIP=antipassive", "APPL=applicative", "ART=article", "AUX=auxiliary", "BEN=benefactive", "CAUS=causative", "CLF=classifier", "COM=comitative", "COMP=complementizer", "COMPL=completive", "COND=conditional", "COP=copula", "CVB=converb", "DAT=dative", "DECL=declarative", "DEF=definite", "DEM=demonstrative", "DET=determiner", "DIST=distal", "DISTR=distributive", "DU=dual", "DUR=durative", "ERG=ergative", "EXCL=exclusive", "F=feminine", "FOC=focus", "FUT=future", "GEN=genitive", "IMP=imperative", "INCL=inclusive", "IND=indicative", "INDF=indefinite", "INF=infinitive", "INS=instrumental", "INTR=intransitive", "IPFV=imperfective", "IRR=irrealis", "LOC=locative", "M=masculine", "N=neuter", "N-=non- (e.g. NSG nonsingular, NPST nonpast)", "NEG=negation, negative", "NMLZ=nominalizer/nominalization", "NOM=nominative", "OBJ=object", "OBL=oblique", "P=patient-like argument of canonical transitive verb", "PASS=passive", "PFV=perfective", "PL=plural", "POSS=possessive", "PRED=predicative", "PRF=perfect", "PRS=present", "PROG=progressive", "PROH=prohibitive", "PROX=proximal/proximate", "PST=past", "PTCP=participle", "PURP=purposive", "Q=question particle/marker", "QUOT=quotative", "RECP=reciprocal", "REFL=reflexive", "REL=relative", "RES=resultative", "S=single argument of canonical intransitive verb", "SBJ=subject", "SBJV=subjunctive", "SG=singular", "TOP=topic", "TR=transitive", "VOC=vocative"); my $filename; sub first{ print "Enter the name of your TeX file or type 'help' for help: "; $filename = <>; if ($filename =~ /help/) { help(); }else{ parse(); }; }; #first ends first(); #parse(); lgr(); prin(); sub parse{ #parsing tex my $dummy = 0; my @words; my $check = "no"; my @spaces; my @abbr; chomp $filename; if ( $filename =~ /\s/ ) { my @dumb = split(/\s/, $filename); $filename = $dumb[0]; if ($dumb[1] =~ /-lgr/ ) { $option = "yes"; }; }; open FILE, "<:utf8", $filename or die "No such file"; my @file; while () { chomp; push(@file, $_); }; close FILE; $length = scalar(@file); for (my $i = 0; $i < $length; $i++) { if ( $file[$i] =~ m/(\\(gll|[abcdef(exg.)]g\.)|textsc\/)/ ) { $dummy = $i + 1; while ( $file[$dummy] =~ m/[\-|\s]([A-Z]+)[\s|\-|\:|\.|\=]|[\=|\s]([A-Z]+)[\s|\-|\:|\.|\=]|[\.|\s]([A-Z]+)[\s|\-|\:|\.|\=]|[\:|\s]([A-Z]+)[\s|\-|\:|\.|\=]|(SG|DU|PL)|(1|2|3)|\s([A-Z]+)\s/g ) { # print $&."\n"; my $hell = $&; $hell =~ s/-//g;$hell =~ s/\s//g;$hell =~ s/://g;$hell =~ s/\.//g;$hell =~ s/=//g; for my $line (@abbr){ $check = "yes" if ($line eq $hell); # print $hell.$line.$check."\n"; }; push(@abbr, $hell) if ($check =~ "no"); $check = "no"; }; }; }; @sorted_abbr = sort @abbr; $length = scalar(@sorted_abbr); foreach (@sorted_abbr) { chomp; }; }; #subparse ends sub lgr{ $length = scalar(@sorted_abbr); if ( $option =~ /yes/ ) { for (my $i = 0; $i < $length;$i++) { foreach (@lgr) { my @dummy = split(/=/, $_); if ( $dummy[0] eq $sorted_abbr[$i] ) { $sorted_abbr[$i] = "\\item[$sorted_abbr[$i]] '$dummy[1]'"; }; }; }; }; }; #lgr ends sub prin{ foreach (@sorted_abbr) { if ( $_ !~ /^\\/ ) { $_ = "\\item[$_] ''"; }; }; $filename =~ s/\.tex*//; my $newfilename = $filename."-abb.txt"; open ABB, ">:utf8", $newfilename; foreach (@sorted_abbr) { print ABB $_."\n"; }; close ABB; }; #prin ends sub help{ print "Instructions: 1. Enter the name of the TeX file you want to run the script on.\n2. If you want to find the abbreviations already in the list of 'Leipzig Glossing Rules' automatically, type '-lgr' after the file name.\nExample: 'foo.tex -lgr'.\n3. The list of abbreviations will be printed in a file '*-abb.txt'. If the name of the TeX file was 'foo.tex' than the name of the list-file will be 'foo-abb-txt'.\n\nNote: The script and the TeX file have to be in the same directory.\n\nNote 2: Abbreviation is defined as:\n1. Anything which is written in uppercase and has more than one letter\n2. Anything written in \\textsc{}\n3. Only exceptions to the first two rules are the person/number abbreviations '1s, 1d, 2s, etc.'\n4. Only the line \\gll, \\ag., \\bg. etc. \\exg. and \\exg are searched for abbreviations\n\nQuestions: email to ozangulle\@gmail.com .\n"; first(); }; #help ends