package edu.stanford.nlp.objectbank;
import java.util.function.Function;
import edu.stanford.nlp.util.AbstractIterator;
import java.io.BufferedReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* An Iterator that reads the contents of a Reader, delimited by the specified
* delimiter, and then be subsequently processed by an Function to produce
* Objects of type T.
*
* @author Jenny Finkel <A HREF="mailto:jrfinkel@stanford.edu>jrfinkel@stanford.edu</A>
* @param <T> The type of the objects returned
*/
public class DelimitRegExIterator<T> extends AbstractIterator<T> {
private Iterator<String> tokens;
private final Function<String,T> op;
private T nextToken; // = null;
//TODO: not sure if this is the best way to name things...
public static DelimitRegExIterator<String> defaultDelimitRegExIterator(Reader in, String delimiter) {
return new DelimitRegExIterator<>(in, delimiter, new IdentityFunction<>());
}
public DelimitRegExIterator(Reader r, String delimiter, Function<String,T> op) {
this.op = op;
BufferedReader in = new BufferedReader(r);
try {
String line;
StringBuilder input = new StringBuilder();
while ((line = in.readLine()) != null) {
input.append(line).append("\n");
}
line = input.toString();
Pattern p = Pattern.compile("^"+delimiter);
Matcher m = p.matcher(line);
line = m.replaceAll("");
p = Pattern.compile(delimiter+"$");
m = p.matcher(line);
line = m.replaceAll("");
line = line.trim();
tokens = Arrays.asList(line.split(delimiter)).iterator();
} catch (Exception e) {
}
setNext();
}
private void setNext() {
if (tokens.hasNext()) {
String s = tokens.next();
nextToken = parseString(s);
} else {
nextToken = null;
}
}
protected T parseString(String s) {
return op.apply(s);
}
@Override
public boolean hasNext() {
return nextToken != null;
}
@Override
public T next() {
if (nextToken == null) {
throw new NoSuchElementException("DelimitRegExIterator exhausted");
}
T token = nextToken;
setNext();
return token;
}
public Object peek() {
return nextToken;
}
/**
* Returns a factory that vends DelimitRegExIterators that read the contents of the
* given Reader, splits on the specified delimiter, then returns the result.
*/
public static IteratorFromReaderFactory<String> getFactory(String delim) {
return DelimitRegExIteratorFactory.defaultDelimitRegExIteratorFactory(delim);
}
/**
* Returns a factory that vends DelimitRegExIterators that reads the contents of the
* given Reader, splits on the specified delimiter, applies op, then returns the result.
*/
public static <T> IteratorFromReaderFactory<T> getFactory(String delim, Function<String,T> op) {
return new DelimitRegExIteratorFactory<>(delim, op);
}
public static class DelimitRegExIteratorFactory<T> implements IteratorFromReaderFactory<T>, Serializable {
private static final long serialVersionUID = 6846060575832573082L;
private final String delim;
private final Function<String,T> op;
public static DelimitRegExIteratorFactory<String> defaultDelimitRegExIteratorFactory(String delim) {
return new DelimitRegExIteratorFactory<>(delim, new IdentityFunction<>());
}
public DelimitRegExIteratorFactory(String delim, Function<String,T> op) {
this.delim = delim;
this.op = op;
}
public Iterator<T> getIterator(Reader r) {
return new DelimitRegExIterator<>(r, delim, op);
}
}
public static void main(String[] args) {
String s = "@@123\nthis\nis\na\nsentence\n\n@@124\nThis\nis\nanother\n.\n\n@125\nThis\nis\nthe\nlast\n";
DelimitRegExIterator<String> di = DelimitRegExIterator.defaultDelimitRegExIterator(new StringReader(s), "\n\n");
while (di.hasNext()) {
System.out.println("****\n" + di.next() + "\n****");
}
}
}