/**
* Copyright (C) 2012 cogroo <cogroo@cogroo.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cogroo.uima.readers;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import opennlp.tools.util.ObjectStream;
import cogroo.uima.readers.entities.Paragraph;
import cogroo.uima.readers.entities.SentenceEx;
import cogroo.uima.readers.entities.Text;
/**
* Not threadsafe.
*
*/
public class MultiReader implements ObjectStream<Text> {
private final List<File> mFiles;
private final String mEncoding;
private int mCurrentIndex;
private Reader mCurrentReader;
public MultiReader(List<File> aFiles, String aEncoding) {
this.mFiles = aFiles;
this.mEncoding = aEncoding;
this.mCurrentIndex = 0;
}
public Text read() throws IOException {
if (this.mCurrentReader == null) {
updateReader();
}
Text ret = this.mCurrentReader.read();
if (ret != null)
return ret;
this.mCurrentIndex++;
if (this.mCurrentIndex < this.mFiles.size()) {
this.updateReader();
return read();
}
return null;
}
public void reset() throws IOException, UnsupportedOperationException {
this.mCurrentReader.close();
this.mCurrentIndex = 0;
this.updateReader();
}
public void close() throws IOException {
this.mCurrentReader.close();
this.mCurrentIndex = 0;
}
private void updateReader() throws FileNotFoundException, IOException {
if (this.mCurrentReader != null)
this.mCurrentReader.close();
this.mCurrentReader = new Reader(new FileInputStream(
this.mFiles.get(this.mCurrentIndex)), this.mEncoding);
}
public static void main(String[] args) throws FileNotFoundException,
IOException {
List<File> fs = new ArrayList<File>();
fs.add(new File(
"/Users/wcolen/Documents/wrks/corpus/Bosque/Bosque_CF_8.0.ad.txt"));
// fs.add(new
// File("/Users/wcolen/Documents/wrks/corpuswrk/Corpus/Metro/Metro6.txt"));
MultiReader r = new MultiReader(fs, "ISO-8859-1");
int tcount = 1;
Text t = r.read();
while (t != null) {
String text = t.getText();
System.out.println(tcount++);
int pcount = 1;
for (Paragraph p : t.getParagraphs()) {
System.out.println(" > " + pcount++);
System.out.println("[" + text.subSequence(p.getStart(), p.getEnd())
+ "]");
for (SentenceEx s : p.getSentences()) {
System.out.println(" > " + s.getSentence().getMetadata());
System.out.println("[" + text.subSequence(s.getStart(), s.getEnd())
+ "]");
s.getGrammarErrors();
}
}
t = r.read();
}
}
}