/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis; import java.io.IOException; /** * Works in conjunction with the SinkTokenizer to provide the ability to set aside tokens * that have already been analyzed. This is useful in situations where multiple fields share * many common analysis steps and then go their separate ways. * <p/> * It is also useful for doing things like entity extraction or proper noun analysis as * part of the analysis workflow and saving off those tokens for use in another field. * * <pre> SinkTokenizer sink1 = new SinkTokenizer(); SinkTokenizer sink2 = new SinkTokenizer(); TokenStream source1 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader1), sink1), sink2); TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader2), sink1), sink2); TokenStream final1 = new LowerCaseFilter(source1); TokenStream final2 = source2; TokenStream final3 = new EntityDetect(sink1); TokenStream final4 = new URLDetect(sink2); d.add(new Field("f1", final1)); d.add(new Field("f2", final2)); d.add(new Field("f3", final3)); d.add(new Field("f4", final4)); * </pre> * In this example, <code>sink1</code> and <code>sink2<code> will both get tokens from both * <code>reader1</code> and <code>reader2</code> after whitespace tokenizer * and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. * It is important, that tees are consumed before sinks (in the above example, the field names must be * less the sink's field names). * Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene <p/> * * See <a href="http://issues.apache.org/jira/browse/LUCENE-1058">LUCENE-1058</a>. * <p/> * WARNING: {@link TeeTokenFilter} and {@link SinkTokenizer} only work with the old TokenStream API. * If you switch to the new API, you need to use {@link TeeSinkTokenFilter} instead, which offers * the same functionality. * @see SinkTokenizer * @deprecated Use {@link TeeSinkTokenFilter} instead **/ public class TeeTokenFilter extends TokenFilter { SinkTokenizer sink; public TeeTokenFilter(TokenStream input, SinkTokenizer sink) { super(input); this.sink = sink; } public Token next(final Token reusableToken) throws IOException { assert reusableToken != null; Token nextToken = input.next(reusableToken); sink.add(nextToken); return nextToken; } }