/** * Copyright 2007 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sohospace.lucene.analysis.xanalyzer; import java.io.IOException; import java.io.Reader; import java.util.Arrays; import java.util.Iterator; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import com.sohospace.lucene.analysis.xanalyzer.collector.QueryTokenCollector; import com.sohospace.lucene.analysis.xanalyzer.collector.WriterTokenCollector; import com.sohospace.paoding.Beef; import com.sohospace.paoding.Collector; import com.sohospace.paoding.Knife; import com.sohospace.paoding.Paoding; /** * XTokenizer�ǻ��ڡ��Ҷ���ţ����ܵ�TokenStreamʵ�֣�ΪXAnalyzerʹ�á� * <p> * * @author Zhiliang Wang [qieqie.wang@gmail.com] * * @see Beef * @see Knife * @see Paoding * @see Tokenizer * @see XAnalyzer * * @see Collector * @see TokenCollector * @see QueryTokenCollector * @see WriterTokenCollector * * @since 1.0 */ public final class XTokenizer extends TokenStream implements Collector { // ------------------------------------------------- /** * �ı��ַ�Դ * * @see #next() */ private final Reader input; /** * */ private static final int bufferLength = 128; /** * ��������{@link #input}���ı��ַ� * * @see #next() */ private final char[] buffer = new char[bufferLength]; /** * {@link buffer}[0]��{@link #input}�е�ƫ�� * * @see #collect(String, int, int) * @see #next() */ private int offset; /** * */ private final Beef beef = new Beef(buffer, 0, 0); /** * */ private int dissected; /** * ���ڷֽ�beef�е��ı��ַ�����XAnalyzer�ṩ * * @see #next() */ private Knife knife; /** * */ private TokenCollector tokenCollector; /** * tokens������������next()����˳���ȡtokens�е�Token���� * * @see #tokens * @see #next() */ private Iterator<Token> tokenIteractor; // ------------------------------------------------- /** * * @param input * @param knife * @param tokenCollector */ public XTokenizer(Reader input, Knife knife, TokenCollector tokenCollector) { this.input = input; this.knife = knife; this.tokenCollector = tokenCollector; } // ------------------------------------------------- public TokenCollector getTokenCollector() { return tokenCollector; } public void setTokenCollector(TokenCollector tokenCollector) { this.tokenCollector = tokenCollector; } // ------------------------------------------------- public void collect(String word, int offset, int end) { tokenCollector.collect(word, this.offset + offset, this.offset + end); } // ------------------------------------------------- @Override public Token next() throws IOException { // �Ѿ��tokensIteractor��Token�������������reader�������� while (tokenIteractor == null || !tokenIteractor.hasNext()) { System.out.println(dissected); int read = 0; int remainning = -1;//���´�reader�����ַ�ǰ��buffer�л�ʣ�µ��ַ�����������ʾ��ǰ�ݲ���Ҫ��reader�ж����ַ� if (dissected >= beef.length()) { remainning = 0; } else if (dissected < 0){ remainning = bufferLength + dissected; } if (remainning >= 0) { if (remainning > 0) { System.arraycopy(buffer, -dissected, buffer, 0, remainning); } read = input.read(buffer, remainning, bufferLength - remainning); int charCount = remainning + read; if (charCount < 0) { // reader�Ѿ������ӿ�next()Ҫ�󷵻�null. return null; } if (charCount < bufferLength) { buffer[charCount ++] = 0; } // ���조ţ������ʹ��knife���⡱֮ beef.set(0, charCount); offset += Math.abs(dissected); //offset -= remainning; dissected = 0; } dissected = knife.dissect((Collector)this, beef, dissected); // offset += read;// !!! tokenIteractor = tokenCollector.iterator(); } // ����tokensIteractor��һ��Token���� return tokenIteractor.next(); } // ------------------------------------------------- @Override public void close() throws IOException { super.close(); input.close(); } }