use strict; for my $item () { chomp $item; if ($item !~ /^\\bibitem\[.*?\] # skip the command and optional argument \{([A-Z\d]+)\] # extract the bibliography item, all caps letters \s*(.*?) # extract the author list \((\d{4})\) # extract the year (.*?) # extract the publication title \{\\it\s(.*?)\}, # extract the category \s*\{\\bf\s(\d+)\},# extract the volume \s*(\d+-\d+) # extract the page numbers \. # a dot at the end /x) { print "%% Ignoring weird bibliography item: >$item<\n"; next; }; printf <<'TEX',$1,$2,$3,... @article{%s, ... TEX }; __DATA__ \bibitem[Ao {\it et~al}., 2004]{AGKM04} Ao, W. {\it et~al.} (2001) Some long title 1, {\it Science}, {\bf 305}, 1743-1746. \bibitem[Bailey and Elkan, 1995]{BE95} Bailey, T. Elkan, C. (1995) Some long title 2 {\it Machine Learning}, {\bf 21}, 51-80.