import java.io.*; import java.nio.*; import java.nio.channels.*; import java.nio.charset.*; import java.util.*; import java.util.regex.*; public class WordCount { public static void main(String args[]) throws Exception { String filename = args[0]; // Map File from filename to byte buffer FileInputStream input = new FileInputStream(filename); FileChannel channel = input.getChannel(); int fileLength = (int)channel.size(); MappedByteBuffer buffer = channel.map(FileChannel.MAP_RO, 0, fileLength); // Convert to character buffer Charset charset = Charset.forName("ISO-8859-1"); CharsetDecoder decoder = charset.newDecoder(); CharBuffer charBuffer = decoder.decode(buffer); // Create line pattern Pattern linePattern = Pattern.compile(".*$", Pattern.MULTILINE); // Create word pattern Pattern wordBreakPattern = Pattern.compile("[{space}{punct}]"); // Match line pattern to buffer Matcher lineMatcher = linePattern.matcher(charBuffer); Map map = new TreeMap(); Integer ONE = new Integer(1); // For each line while (lineMatcher.find()) { // Get line CharSequence line = lineMatcher.group(); // Get array of words on line String words[] = wordBreakPattern.split(line); // For each word for (int i=0, n=words.length; i 0) { Integer frequency = (Integer)map.get(words[i]); if (frequency == null) { frequency = ONE; } else { int value = frequency.intValue(); frequency = new Integer(value + 1); } map.put(words[i], frequency); } } } System.out.println(map); } }