/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ package cc.mallet.pipe.iterator; import java.io.*; import java.util.Iterator; import cc.mallet.types.*; /** * Iterator that takes a Reader, breaks up the input into * top-level parenthesized expressions. For example, * in the input <tt>(a (a b) c) f (d e)</tt>, there * are two top-level expressions '(a (a b) c)' and '(d e)'. * * Text that is not within parentheses is ignored. * * Created: Thu Feb 26 13:45:43 2004 * * @author <a href="mailto:casutton@cs.umass.edu">Charles Sutton</a> * @version $Id: ParenGroupIterator.java,v 1.1 2007/10/22 21:37:49 mccallum Exp $ */ public class ParenGroupIterator implements Iterator<Instance> { private Reader reader; private char open; private char close; private String nextGroup; private int groupIdx; public ParenGroupIterator (Reader input) { this (input, '(', ')'); } public ParenGroupIterator (Reader input, char openParen, char closeParen) { this.reader = new BufferedReader (input); this.open = openParen; this.close = closeParen; nextGroup = getNextGroup (); } private String getNextGroup () { StringBuffer buf = new StringBuffer (); int depth = 1; try { // Eat up nonparen characters int b; while ((b = reader.read()) != (int)open) { if (b == -1) return null; } buf.append (open); while ((b = reader.read()) != -1) { char ch = (char)b; buf.append (ch); if (ch == open) { depth++; } else if (ch == close) { depth--; if (depth == 0) break; } } } catch (IOException e) { throw new RuntimeException (e); } return buf.toString(); } // Interface PipeInputIterate public Instance next () { Instance carrier = new Instance (nextGroup, null, "parengroup"+(groupIdx++), null); nextGroup = getNextGroup (); return carrier; } public boolean hasNext () { return nextGroup != null; } public void remove () { throw new IllegalStateException ("This Iterator<Instance> does not support remove()."); } }