package org.wikipedia.miner.util; import java.util.Iterator; import java.util.NoSuchElementException; import org.wikipedia.miner.db.WEntry; import org.wikipedia.miner.db.WEnvironment; import org.wikipedia.miner.db.WIterator; import org.wikipedia.miner.db.struct.DbLabel; import org.wikipedia.miner.db.struct.DbPage; import org.wikipedia.miner.model.Label; import org.wikipedia.miner.model.Page; import org.wikipedia.miner.model.Page.PageType; import org.wikipedia.miner.util.text.TextProcessor; /** * @author David Milne * * Provides efficient iteration over the labels in Wikipedia */ public class LabelIterator implements Iterator<Label>{ WEnvironment env ; TextProcessor tp ; WIterator<String,DbLabel> iter ; Label nextLabel = null ; /** * Creates an iterator that will loop through all pages in Wikipedia. * * @param database an active (connected) Wikipedia database. */ public LabelIterator(WEnvironment env, TextProcessor tp) { this.env = env ; this.tp = tp ; iter = env.getDbLabel(tp).getIterator() ; queueNext() ; } public boolean hasNext() { return (nextLabel != null) ; } public void remove() { throw new UnsupportedOperationException() ; } public Label next() { if (nextLabel == null) throw new NoSuchElementException() ; Label l = nextLabel ; queueNext() ; return l ; } private void queueNext() { try { nextLabel=toLabel(iter.next()) ; } catch (NoSuchElementException e) { nextLabel = null ; } } private Label toLabel(WEntry<String,DbLabel> e) { if (e== null) return null ; else return Label.createLabel(env, e.getKey(), e.getValue(), tp) ; } public void close() { iter.close(); } }