/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ /** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */ package cc.mallet.pipe.iterator; import java.io.*; import java.util.Iterator; import java.util.regex.*; import java.net.URI; import java.net.URISyntaxException; import cc.mallet.pipe.Pipe; import cc.mallet.types.*; public class SimpleFileLineIterator implements Iterator<Instance> { BufferedReader reader = null; int index = -1; String currentLine = null; boolean hasNextUsed = false; int progressDisplayInterval = 0; public SimpleFileLineIterator (String filename) { try { this.reader = new BufferedReader (new FileReader(filename)); this.index = 0; } catch (IOException e) { throw new RuntimeException (e); } } public SimpleFileLineIterator (File file) { try { this.reader = new BufferedReader (new FileReader(file)); this.index = 0; } catch (IOException e) { throw new RuntimeException (e); } } /** Set the iterator to periodically print the * total number of lines read to standard out. * @param interval how often to print */ public void setProgressDisplayInterval(int interval) { progressDisplayInterval = interval; } public Instance next () { URI uri = null; try { uri = new URI ("array:" + index++); } catch (Exception e) { throw new RuntimeException (e); } if (!hasNextUsed) { try { currentLine = reader.readLine(); } catch (IOException e) { throw new RuntimeException (e); } } else { hasNextUsed = false; } if (progressDisplayInterval != 0 && index > 0 && index % progressDisplayInterval == 0) { System.out.println(index); } return new Instance (currentLine, null, uri, null); } public boolean hasNext () { hasNextUsed = true; try { currentLine = reader.readLine(); } catch (IOException e) { throw new RuntimeException (e); } return (currentLine != null); } public void remove () { throw new IllegalStateException ("This Iterator<Instance> does not support remove()."); } }