package edu.stanford.nlp.objectbank;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.AbstractIterator;
import java.io.*;
import java.net.URL;
import java.util.*;
/**
* A ReaderIteratorFactory provides a means of getting an Iterator
* which returns java.util.Readers over a Collection of input
* sources. Currently supported input sources are: Files, Strings,
* URLs and Readers. A ReaderIteratorFactory may take a Collection
* on construction and new sources may be added either individually
* (via the add(Object) method) or as a Collection (via the
* addAll(Collection method). The implementation automatically
* determines the type of input and produces a java.util.Reader
* accordingly. If you wish to add support for a new kind of input,
* refer the the setNextObject() method of the nested class
* ReaderIterator.
* <p>
* The Readers returned by this class are not closed by the class when you
* move to the next element (nor at any other time). So, if you want the
* files closed, then the caller needs to close them. The caller can only
* do this if they pass in Readers. Otherwise, this class should probably
* close them but currently doesn't.
* <p>
* TODO: Have this class close the files that it opens.
*
* @author <A HREF="mailto:jrfinkel@stanford.edu">Jenny Finkel</A>
* @version 1.0
*/
//TODO: does this always store the same kind of thing in a given instance,
//or do you want to allow having some Files, some Strings, etc.?
public class ReaderIteratorFactory implements Iterable<Reader> {
/**
* Constructs a ReaderIteratorFactory from the input sources
* contained in the Collection. The Collection should contain
* Objects of type File, String, URL and/or Reader. See class
* description for details.
*
* @param c Collection of input sources.
*/
public ReaderIteratorFactory(Collection<?> c) {
this();
this.c.addAll(c);
}
public ReaderIteratorFactory(Collection<?> c, String encoding){
this();
this.enc = encoding;
this.c.addAll(c);
}
/**
* Convenience constructor to construct a ReaderIteratorFactory from a single
* input source. The Object should be of type File, String, URL and Reader. See class
* description for details.
*
* @param o an input source that can be converted into a Reader
*/
public ReaderIteratorFactory(Object o) {
this(Collections.singleton(o));
}
public ReaderIteratorFactory(Object o, String encoding) {
this(Collections.singleton(o), encoding);
}
public ReaderIteratorFactory() {
c = new ArrayList<>();
}
/**
* The underlying Collection of input sources. Currently supported
* input sources are: Files, Strings, URLs and Readers. The
* implementation automatically determines the type of input and
* produces a java.util.Reader accordingly.
*/
protected Collection<Object> c;
/**
* The encoding for file input. This is defaulted to "utf-8"
* only applies when c is of type <code> File </code>.
*/
protected String enc = "UTF-8";
/**
* Returns an Iterator over the input sources in the underlying Collection.
*
* @return an Iterator over the input sources in the underlying Collection.
*/
@Override
public Iterator<Reader> iterator() {
return new ReaderIterator();
}
/**
* Adds an Object to the underlying Collection of input sources.
*
* @param o Input source to be added to the underlying Collection.
*/
public boolean add(Object o) {
return this.c.add(o);
}
/**
* Removes an Object from the underlying Collection of input sources.
*
* @param o Input source to be removed from the underlying Collection.
*/
public boolean remove(Object o) {
return this.c.remove(o);
}
/**
* Adds all Objects in Collection c to the underlying Collection of
* input sources.
*
* @param c Collection of input sources to be added to the underlying Collection.
*/
public boolean addAll(Collection<?> c) {
return this.c.addAll(c);
}
/**
* Removes all Objects in Collection c from the underlying Collection of
* input sources.
*
* @param c Collection of input sources to be removed from the underlying Collection.
*/
public boolean removeAll(Collection<?> c) {
return this.c.removeAll(c);
}
/**
* Removes all Objects from the underlying Collection of input sources
* except those in Collection c
*
* @param c Collection of input sources to be retained in the underlying Collection.
*/
public boolean retainAll(Collection<?> c) {
return this.c.retainAll(c);
}
/**
* Iterator which contains BufferedReaders.
*/
class ReaderIterator extends AbstractIterator<Reader> {
private Iterator<?> iter;
private Reader nextObject;
/**
* Sole constructor.
*/
public ReaderIterator() {
iter = c.iterator();
setNextObject();
}
/**
* sets nextObject to a BufferedReader for the next input source,
* or null of there is no next input source.
*/
private void setNextObject() {
if (!iter.hasNext()) {
nextObject = null;
iter = null;
return;
}
Object o = iter.next();
try {
if (o instanceof File) {
File file = (File) o;
if (file.isDirectory()) {
ArrayList<Object> l = new ArrayList<>();
l.addAll(Arrays.asList(file.listFiles()));
while (iter.hasNext()) {
l.add(iter.next());
}
iter = l.iterator();
file = (File) iter.next();
}
nextObject = IOUtils.readerFromFile(file, enc);
} else if (o instanceof String) {
// File file = new File((String)o);
// if (file.exists()) {
// if (file.isDirectory()) {
// ArrayList l = new ArrayList();
// l.addAll(Arrays.asList(file.listFiles()));
// while (iter.hasNext()) {
// l.add(iter.next());
// }
// iter = l.iterator();
// file = (File) iter.next();
// }
// if (((String)o).endsWith(".gz")) {
// BufferedReader tmp = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), enc));
// nextObject = tmp;
// } else {
// nextObject = new BufferedReader(new EncodingFileReader(file, enc));
// }
// } else {
nextObject = new BufferedReader(new StringReader((String) o));
// }
} else if (o instanceof URL) {
// todo: add encoding specification to this as well? -akleeman
nextObject = new BufferedReader(new InputStreamReader(((URL) o).openStream()));
} else if (o instanceof Reader) {
nextObject = new BufferedReader((Reader) o);
} else {
throw new RuntimeException("don't know how to get Reader from class " + o.getClass() + " of object " + o);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* @return true if there is another (valid) input source to read from
*/
@Override
public boolean hasNext() {
return nextObject != null;
}
/**
* Returns nextObject and then sets nextObject to the next input source.
*
* @return BufferedReader for next input source.
*/
@Override
public Reader next() {
if (nextObject == null) {
throw new NoSuchElementException();
}
Reader tmp = nextObject;
setNextObject();
return tmp;
}
}
}