/* Copyright (C) 2010 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
package cc.mallet.pipe.iterator;
import java.io.*;
import java.util.Iterator;
import java.net.URI;
import cc.mallet.types.*;
/**
* Very similar to the SimpleFileLineIterator,
* but skips lines that match a regular expression.
*
* @author Gregory Druck
*/
public class SelectiveFileLineIterator implements Iterator<Instance> {
BufferedReader reader = null;
int index = -1;
String currentLine = null;
boolean hasNextUsed = false;
String skipRegex;
public SelectiveFileLineIterator (Reader reader, String skipRegex) {
this.reader = new BufferedReader (reader);
this.index = 0;
this.skipRegex = skipRegex;
}
public Instance next () {
if (!hasNextUsed) {
try {
currentLine = reader.readLine();
while (currentLine != null && currentLine.matches(skipRegex)) {
currentLine = reader.readLine();
}
}
catch (IOException e) {
throw new RuntimeException (e);
}
}
else {
hasNextUsed = false;
}
URI uri = null;
try { uri = new URI ("array:" + index++); }
catch (Exception e) { throw new RuntimeException (e); }
return new Instance (currentLine, null, uri, null);
}
public boolean hasNext () {
hasNextUsed = true;
try {
currentLine = reader.readLine();
while (currentLine != null && currentLine.matches(skipRegex)) {
currentLine = reader.readLine();
}
}
catch (IOException e) {
throw new RuntimeException (e);
}
return (currentLine != null);
}
public void remove () {
throw new IllegalStateException ("This Iterator<Instance> does not support remove().");
}
}