/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.xbib.elasticsearch.index.analysis.combo; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.junit.Test; import org.xbib.elasticsearch.index.analysis.BaseTokenStreamTest; import java.io.IOException; /** * Testcase for {@link ComboTokenStream}. */ public class ComboTokenStreamTests extends BaseTokenStreamTest { /** * A TokenStream that takes the same input as the assertTokenStreamContents() function, * and merely passes the corresponding assert. */ public final class ReplayTokenStream extends TokenStream { int index; int length; String[] outputs; int[] positionIncrements; int[] startOffsets; int[] endOffsets; CharTermAttribute output; PositionIncrementAttribute positionIncrement; OffsetAttribute offset; ReplayTokenStream(String[] outputs, int[] startOffsets, int[] endOffsets, int[] positionIncrements) { index = 0; this.outputs = outputs; this.startOffsets = startOffsets; this.endOffsets = endOffsets; this.positionIncrements = positionIncrements; if (outputs != null) { this.length = outputs.length; output = addAttribute(CharTermAttribute.class); } else { throw new NullPointerException("Outputs is null"); } if (startOffsets != null || endOffsets != null) { if (startOffsets == null || startOffsets.length != length) throw new IllegalArgumentException("Bad startOffsets"); if (endOffsets == null || endOffsets.length != length) throw new IllegalArgumentException("Bad endOffsets"); offset = addAttribute(OffsetAttribute.class); } if (positionIncrements != null) { if (positionIncrements.length != length) throw new IllegalArgumentException("Bad positionIncrements"); positionIncrement = addAttribute(PositionIncrementAttribute.class); } } @Override public final boolean incrementToken() throws IOException { clearAttributes(); if (index >= length) return false; if (output != null) { char[] buffer = outputs[index].toCharArray(); output.copyBuffer(buffer, 0, buffer.length); } if (offset != null) offset.setOffset(startOffsets[index], endOffsets[index]); if (positionIncrement != null) positionIncrement.setPositionIncrement(positionIncrements[index]); index++; return true; } } @Test public void testReplayTokenStream() throws IOException { TokenStream ts = new ReplayTokenStream( new String[]{"ab", "cd", "ef"}, new int[]{ 0, 3, 5}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}); assertTokenStreamContents(ts, new String[]{"ab", "cd", "ef"}, new int[]{ 0, 3, 5}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}); } @Test public void testSingleTokenStream() throws IOException { ComboTokenStream cts = new ComboTokenStream( new ReplayTokenStream( new String[]{"ab", "cd", "ef"}, new int[]{ 0, 3, 5}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}) ); cts.addAttribute(CheckClearAttributesAttribute.class); assertTokenStreamContents(cts, new String[]{"ab", "cd", "ef"}, new int[]{ 0, 3, 5}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}); } @Test public void testDoubleTokenStream() throws IOException { ComboTokenStream cts = new ComboTokenStream( new ReplayTokenStream( new String[]{"ab", "cd", "ef"}, new int[]{ 0, 3, 5}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}), new ReplayTokenStream( new String[]{"B", "D", "F"}, new int[]{ 1, 4, 6}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}) ); assertTokenStreamContents(cts, new String[]{"ab", "B", "cd", "D", "ef", "F"}, new int[]{ 0, 1, 3, 4, 5, 6}, new int[]{ 2, 2, 4, 4, 6, 6}, new int[]{ 1, 0, 1, 0, 1, 0}); // Now in reversed order cts = new ComboTokenStream( new ReplayTokenStream( new String[]{"B", "D", "F"}, new int[]{ 1, 4, 6}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}), new ReplayTokenStream( new String[]{"ab", "cd", "ef"}, new int[]{ 0, 3, 5}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}) ); assertTokenStreamContents(cts, new String[]{"ab", "B", "cd", "D", "ef", "F"}, new int[]{ 0, 1, 3, 4, 5, 6}, new int[]{ 2, 2, 4, 4, 6, 6}, new int[]{ 1, 0, 1, 0, 1, 0}); } @Test public void testDoubleTokenStreamMultipleAtSamePosition() throws IOException { ComboTokenStream cts = new ComboTokenStream( new ReplayTokenStream( new String[]{"ab", "cd", "ef"}, new int[]{ 0, 3, 5}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}), new ReplayTokenStream( new String[]{"A", "B", "C", "D", "E", "F"}, new int[]{ 0, 1, 3, 4, 5, 6}, new int[]{ 1, 2, 3, 4, 5, 6}, new int[]{ 1, 0, 1, 0, 1, 0}) ); if (ComboTokenStream.KEEP_STREAM_IF_SAME_POSITION) assertTokenStreamContents(cts, new String[]{"A", "B", "ab", "C", "D", "cd", "E", "F", "ef"}, new int[]{ 0, 1, 0, 3, 4, 3, 5, 6, 5}, new int[]{ 1, 2, 2, 3, 4, 4, 5, 6, 6}, new int[]{ 1, 0, 0, 1, 0, 0, 1, 0, 0}); else assertTokenStreamContents(cts, new String[]{"A", "ab", "B", "C", "cd", "D", "E", "ef", "F"}, new int[]{ 0, 0, 1, 3, 3, 4, 5, 5, 6}, new int[]{ 1, 2, 2, 3, 4, 4, 5, 6, 6}, new int[]{ 1, 0, 0, 1, 0, 0, 1, 0, 0}); // Now in reversed order cts = new ComboTokenStream( new ReplayTokenStream( new String[]{"A", "B", "C", "D", "E", "F"}, new int[]{ 0, 1, 3, 4, 5, 6}, new int[]{ 1, 2, 3, 4, 5, 6}, new int[]{ 1, 0, 1, 0, 1, 0}), new ReplayTokenStream( new String[]{"ab", "cd", "ef"}, new int[]{ 0, 3, 5}, new int[]{ 2, 4, 6}, new int[]{ 1, 1, 1}) ); if (ComboTokenStream.KEEP_STREAM_IF_SAME_POSITION) assertTokenStreamContents(cts, new String[]{"A", "B", "ab", "C", "D", "cd", "E", "F", "ef"}, new int[]{ 0, 1, 0, 3, 4, 3, 5, 6, 5}, new int[]{ 1, 2, 2, 3, 4, 4, 5, 6, 6}, new int[]{ 1, 0, 0, 1, 0, 0, 1, 0, 0}); else assertTokenStreamContents(cts, new String[]{"A", "ab", "B", "C", "cd", "D", "E", "ef", "F"}, new int[]{ 0, 0, 1, 3, 3, 4, 5, 5, 6}, new int[]{ 1, 2, 2, 3, 4, 4, 5, 6, 6}, new int[]{ 1, 0, 0, 1, 0, 0, 1, 0, 0}); } }