package org.apache.lucene.mark; /** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; /** * Utility class used to iterate either forward or backward through the * tokens in a single string of text. * * <p>Created: Dec 13, 2004</p> * * @author Martin Haye */ public class BasicWordIter implements WordIter, Cloneable { /** The original text to which the tokens refer */ protected String text; /** Array of tokens, holding words from the current chunk */ protected Token[] tokens; /** Current token this iterator is pointed at */ protected int tokNum; /** Word position of the curren token */ protected int wordPos = -1; /** Word position of the last token */ protected int maxWordPos = -1; /** * Construct the iterator and read in tokens from the given stream. * * @param text text represented by the tokens * @param stream stream of tokens from the text * * @throws IOException If something goes wrong reading from 'stream' */ public BasicWordIter(String text, TokenStream stream) throws IOException { Token t; // Keep a reference to the text for future use. this.text = text; // Pull out all the tokens and make them into a list. ArrayList tokenList = new ArrayList(10); while ((t = stream.next()) != null) { tokenList.add(t); maxWordPos += t.getPositionIncrement(); } stream.close(); // Convert the list to an easier-to-use array. tokens = (Token[])tokenList.toArray(new Token[tokenList.size()]); // Reset variables, and we're ready to go! tokNum = wordPos = -1; } // constructor /** * Do-nothing constructor - should only be used by derived classes * that perform their own initialization. */ protected BasicWordIter() { } // inherit javadoc public Object clone() { try { return super.clone(); } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } } // inherit javadoc public boolean next(boolean force) { // Are we at the end? if (tokNum >= tokens.length - 1) return false; // Advance. tokNum++; wordPos += tokens[tokNum].getPositionIncrement(); return true; } // inherit javadoc public boolean prev(boolean force) { // Are we at the start? if (tokNum <= 0) return false; // Back up one token. wordPos -= tokens[tokNum].getPositionIncrement(); --tokNum; return true; } // prev() // inherit javadoc public void seekFirst(int targetPos, boolean force) { // Move backward if we have to. while (targetPos <= wordPos) { if (!prev(force && targetPos < wordPos)) break; } // Move forward until finished. while (targetPos > wordPos) { if (!next(force)) break; } } // inherit javadoc public void seekLast(int targetPos, boolean force) { // Move forward if we have to. while (targetPos >= wordPos) { if (!next(force && targetPos > wordPos)) break; } // Move backward until finished. while (targetPos < wordPos) { if (!prev(force)) break; } } // inherit javadoc public MarkPos getPos(int startOrEnd) { MarkPos pos = new BasicMarkPos(); getPos(pos, startOrEnd); return pos; } // inherit javadoc public void getPos(MarkPos pos, int startOrEnd) { BasicMarkPos bm = (BasicMarkPos)pos; bm.fullText = text; switch (startOrEnd) { // Start of field case WordIter.FIELD_START: bm.wordPos = 0; bm.charPos = 0; break; // First character of the current word case WordIter.TERM_START: bm.wordPos = wordPos; bm.charPos = tokens[tokNum].startOffset(); break; // Last character (plus one) of the current word case WordIter.TERM_END: bm.wordPos = wordPos; bm.charPos = tokens[tokNum].endOffset(); break; // End of word plus spaces and punctuation case WordIter.TERM_END_PLUS: bm.wordPos = wordPos; if (tokNum < tokens.length - 1) bm.charPos = tokens[tokNum + 1].startOffset(); else bm.charPos = text.length(); break; // End of field. case WordIter.FIELD_END: bm.wordPos = maxWordPos; bm.charPos = text.length(); break; default: assert false : "Unknown start/end mode"; } } // inherit javadoc public final String term() { return tokens[tokNum].termText(); } }