package org.arabidopsis.ahocorasick;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Benchmark {
public static void main(String[] args) throws IOException, InterruptedException {
String[] words = {
"Christmas", "Cains", "Marley", "spectre", "Ebenezer", "double-ironed", "supernatural", "SPIRITS",
"Ding", "Ali Baba"
};
long t0 = System.currentTimeMillis();
BufferedReader fr = new BufferedReader(
new InputStreamReader(Benchmark.class.getResourceAsStream("christmas.txt")));
String text = "";
String line = fr.readLine();
while(line != null) {
text += line + "\n";
line = fr.readLine();
}
System.out.println("Starting benchmark");
long t1 = System.currentTimeMillis();
AhoCorasick finder = new AhoCorasick();
for (String word: words)
finder.add(word, word.toCharArray());
finder.prepare();
Iterator it = finder.search(text.toCharArray());
while(it.hasNext())
it.next();
long t2 = System.currentTimeMillis();
String pattern = "";
for(String word: words) {
if(!pattern.equals(""))
pattern += "|";
pattern += word;
}
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(text);
while(m.find())
continue;
long t3 = System.currentTimeMillis();
System.out.println("File reading: " + Long.toString(t1 - t0) + "ms");
System.out.println("Aho-Corasick: " + Long.toString(t2 - t1) + "ms");
System.out.println("Java-regexp: " + Long.toString(t3 - t2) + "ms");
}
}