package org.trie4j.test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import org.junit.Assert;
import org.trie4j.MapTrie;
import org.trie4j.Trie;
public class WikipediaTitles implements Iterable<String>{
public WikipediaTitles(String gzFilePath) throws IOException{
if(!new File(gzFilePath).exists()) throw new FileNotFoundException(gzFilePath);
this.path = gzFilePath;
}
public WikipediaTitles() throws IOException{
String gzFilePath = "data/" + IOUtil.readLine("data/wiki");
if(!new File(gzFilePath).exists()) throw new FileNotFoundException(gzFilePath);
this.path = gzFilePath;
}
public <T extends Trie> T insertTo(T trie){
for(String s : this){
trie.insert(s);
}
return trie;
}
public MapTrie<Integer> insertTo(MapTrie<Integer> trie){
int i = 0;
for(String s : this){
trie.insert(s, (Integer)(i++));
}
return trie;
}
public Set<String> insertTo(Set<String> set){
for(String s : this){
set.add(s);
}
return set;
}
public Map<String, Integer> insertTo(Map<String, Integer> map){
int i = 0;
for(String s : this){
map.put(s, i++);
}
return map;
}
public long assertAllContains(Trie trie){
LapTimer lt = new LapTimer();
long d = 0;
int i = 0;
for(String s : this){
lt.reset();
boolean a = trie.contains(s);
d += lt.lapNanos();
Assert.assertTrue(String.format("%dth entry: %s", i, s), a);
i++;
}
return d / 1000000;
}
public long assertAllContains(MapTrie<Integer> trie){
LapTimer lt = new LapTimer();
long d = 0;
int i = 0;
for(String s : this){
lt.reset();
Integer a = trie.get(s);
d += lt.lapNanos();
Assert.assertEquals(i + "th entry: ." + s, (Integer)(i), a);
i++;
}
return d / 1000000;
}
public long assertAllContains(Set<String> set){
LapTimer lt = new LapTimer();
long d = 0;
int i = 0;
for(String s : this){
lt.reset();
boolean a = set.contains(s);
d += lt.lapNanos();
Assert.assertTrue(String.format("%dth entry: %s", i, s), a);
i++;
}
return d / 1000000;
}
public long assertAllContains(Map<String, Integer> map){
LapTimer lt = new LapTimer();
long d = 0;
int i = 0;
for(String s : this){
lt.reset();
Integer a = map.get(s);
d += lt.lapNanos();
Assert.assertEquals(i + "th entry: ." + s, (Integer)(i), a);
i++;
}
return d / 1000000;
}
@Override
public Iterator<String> iterator() {
return new Iterator<String>() {
private String next;
private BufferedReader reader;
private NoSuchElementException exception;
{
try{
reader = new BufferedReader(new InputStreamReader(
new GZIPInputStream(new FileInputStream(path)), "UTF-8"));
fetch();
} catch(IOException e){
exception = new NoSuchElementException();
exception.initCause(e);
}
}
@Override
public boolean hasNext() {
if(next != null) return true;
if(exception != null) return false;
try{
fetch();
} catch(IOException e){
exception = new NoSuchElementException();
exception.initCause(e);
return false;
}
return next != null;
}
@Override
public String next() {
if(exception != null){
throw exception;
}
if(next == null){
if(!hasNext()){
exception = new NoSuchElementException();
throw exception;
}
}
String ret = next;
next = null;
return ret;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
private void fetch() throws IOException{
if(reader == null) return;
try{
while((next = reader.readLine()) != null){
next = next.trim();
if(next.length() > 0) break;
}
} catch(IOException e){
reader.close();
reader = null;
throw e;
}
if(next == null){
reader.close();
reader = null;
}
}
};
}
private String path;
}