package org.wikibrain.sr.wikify;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import static org.wikibrain.sr.wikify.WbCorpusLineReader.*;
/**
* @author Shilad Sen
*/
public class WBCorpusDocReader implements Iterable<WBCorpusDocReader.Doc> {
private final File path;
public WBCorpusDocReader(File path) {
this.path = path;
}
@Override
public Iterator<Doc> iterator() {
final Iterator<Line> delegate;
try {
delegate = new WBCorpusLineIterator(path);
} catch (IOException e) {
throw new IllegalArgumentException("Error creating delegate for " + path + ": ", e);
}
return new Iterator<Doc>() {
Doc accum = null;
@Override
public synchronized boolean hasNext() {
return accum != null || delegate.hasNext();
}
@Override
public synchronized Doc next() {
Doc result = null;
while (result == null) {
Line l = delegate.next();
if (l == null) {
result = accum;
accum = null;
} else if (accum == null) {
accum = new Doc(l);
} else if (!accum.getDoc().equals(l.getDoc())) {
result = accum;
accum = new Doc(l);
} else {
accum.addLine(l);
}
}
return result;
}
@Override
public void remove() { throw new UnsupportedOperationException(); }
};
}
public static class Doc {
private final CorpusInfo corpus;
private final DocInfo doc;
private final List<String> lines = new ArrayList<String>();
public Doc(Line line) {
this.corpus = line.getCorpus();
this.doc = line.getDoc();
this.lines.add(line.getLine());
}
public void addLine(Line line) {
this.lines.add(line.getLine());
}
public CorpusInfo getCorpus() {
return corpus;
}
public DocInfo getDoc() {
return doc;
}
public List<String> getLines() {
return lines;
}
}
}