/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis; import java.io.IOException; import java.util.Random; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.TestUtil; // TODO: sometimes remove tokens too...? /** Randomly inserts overlapped (posInc=0) tokens with * posLength sometimes > 1. The chain must have * an OffsetAttribute. */ public final class MockGraphTokenFilter extends LookaheadTokenFilter<LookaheadTokenFilter.Position> { private static boolean DEBUG = false; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final long seed; private Random random; public MockGraphTokenFilter(Random random, TokenStream input) { super(input); seed = random.nextLong(); } @Override protected Position newPosition() { return new Position(); } @Override protected void afterPosition() throws IOException { if (DEBUG) { System.out.println("MockGraphTF.afterPos"); } if (random.nextInt(7) == 5) { final int posLength = TestUtil.nextInt(random, 1, 5); if (DEBUG) { System.out.println(" do insert! posLen=" + posLength); } final Position posEndData = positions.get(outputPos + posLength); // Look ahead as needed until we figure out the right // endOffset: while(!end && posEndData.endOffset == -1 && inputPos <= (outputPos + posLength)) { if (!peekToken()) { break; } } if (posEndData.endOffset != -1) { // Notify super class that we are injecting a token: insertToken(); clearAttributes(); posLenAtt.setPositionLength(posLength); termAtt.append(TestUtil.randomUnicodeString(random)); posIncAtt.setPositionIncrement(0); offsetAtt.setOffset(positions.get(outputPos).startOffset, posEndData.endOffset); if (DEBUG) { System.out.println(" inject: outputPos=" + outputPos + " startOffset=" + offsetAtt.startOffset() + " endOffset=" + offsetAtt.endOffset() + " posLength=" + posLenAtt.getPositionLength()); } // TODO: set TypeAtt too? } else { // Either 1) the tokens ended before our posLength, // or 2) our posLength ended inside a hole from the // input. In each case we just skip the inserted // token. } } } @Override public void reset() throws IOException { super.reset(); // NOTE: must be "deterministically random" because // BaseTokenStreamTestCase pulls tokens twice on the // same input and asserts they are the same: this.random = new Random(seed); } @Override public void close() throws IOException { super.close(); this.random = null; } @Override public boolean incrementToken() throws IOException { if (DEBUG) { System.out.println("MockGraphTF.incr inputPos=" + inputPos + " outputPos=" + outputPos); } if (random == null) { throw new IllegalStateException("incrementToken called in wrong state!"); } return nextToken(); } }