package com.insightfullogic.java8.examples.chapter6;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.util.Comparator.comparing;
import static java.util.stream.Collectors.counting;
import static java.util.stream.Collectors.groupingBy;
public class WordCounting {
public static void main(String[] args) {
InputStream enWiki = WordCounting.class.getResourceAsStream("enwiki-20131230-stubs-meta-hist-incr.xml");
new WordCounting().countUsers(enWiki);
// InputStream huckleberryFinn = WordCounting.class.getResourceAsStream("huckleberry_finn");
// new WordCounting().countWords(huckleberryFinn);
}
private static final Pattern username = Pattern.compile("\\s+<username>(.*?)</username>");
public void countUsers(InputStream stream) {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
Map<String, Long> counts = reader
.lines()
.parallel()
.filter(line -> line.contains("<username>"))
.map(line -> {
Matcher matcher = username.matcher(line);
matcher.find();
return matcher.group(1);
})
.collect(groupingBy(word -> word, counting()));
counts.forEach((word, count) -> System.out.println(word + " -> " + count));
} catch (IOException e) {
e.printStackTrace();
}
}
private static final Pattern space = Pattern.compile("\\s+");
public void countWords(InputStream stream) {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
Map<String, Long> counts = reader
.lines()
.flatMap(space::splitAsStream)
.map(String::trim)
.filter(word -> !word.isEmpty())
.collect(groupingBy(word -> word, counting()));
counts.forEach((word, count) -> System.out.println(word + " -> " + count));
} catch (IOException e) {
e.printStackTrace();
}
}
}