package net.paoding.analysis.analyzer; import java.io.Reader; import net.paoding.analysis.analyzer.impl.MaxWordLengthTokenCollector; import net.paoding.analysis.analyzer.impl.MostWordsTokenCollector; import net.paoding.analysis.knife.Knife; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; public class PaodingAnalyzerBean extends Analyzer { // ------------------------------------------------- /** * 最多切分 */ public static final int MOST_WORDS_MODE = 1; /** * 按最大切分 */ public static final int MAX_WORD_LENGTH_MODE = 2; // ------------------------------------------------- /** * 用于向PaodingTokenizer提供,分解文本字符 * * @see PaodingTokenizer#next() * */ private Knife knife; /** * @see #MOST_WORDS_MODE * @see #MAX_WORD_LENGTH_MODE */ private int mode = MOST_WORDS_MODE; /** * */ private Class modeClass; // ------------------------------------------------- public PaodingAnalyzerBean() { } /** * @see #setKnife(Knife) * @param knife */ public PaodingAnalyzerBean(Knife knife) { this.knife = knife; } /** * @see #setKnife(Knife) * @see #setMode(int) * @param knife * @param mode */ public PaodingAnalyzerBean(Knife knife, int mode) { this.knife = knife; this.mode = mode; } /** * @see #setKnife(Knife) * @see #setMode(int) * @param knife * @param mode */ public PaodingAnalyzerBean(Knife knife, String mode) { this.knife = knife; this.setMode(mode); } // ------------------------------------------------- public Knife getKnife() { return knife; } public void setKnife(Knife knife) { this.knife = knife; } public int getMode() { return mode; } /** * 设置分析器模式. * <p> * * @param mode */ public void setMode(int mode) { if (mode != MOST_WORDS_MODE && mode != MAX_WORD_LENGTH_MODE) { throw new IllegalArgumentException("wrong mode:" + mode); } this.mode = mode; this.modeClass = null; } /** * 设置分析器模式类。 * * @param modeClass * TokenCollector的实现类。 */ public void setModeClass(Class modeClass) { this.modeClass = modeClass; } public void setModeClass(String modeClass) { try { this.modeClass = Class.forName(modeClass); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("not found mode class:" + e.getMessage()); } } public void setMode(String mode) { if (mode.startsWith("class:")) { setModeClass(mode.substring("class:".length())); } else { if ("most-words".equalsIgnoreCase(mode) || "default".equalsIgnoreCase(mode) || ("" + MOST_WORDS_MODE).equals(mode)) { setMode(MOST_WORDS_MODE); } else if ("max-word-length".equalsIgnoreCase(mode) || ("" + MAX_WORD_LENGTH_MODE).equals(mode)) { setMode(MAX_WORD_LENGTH_MODE); } else { throw new IllegalArgumentException("不合法的分析器Mode参数设置:" + mode); } } } // ------------------------------------------------- public TokenStream tokenStream(String fieldName, Reader reader) { if (knife == null) { throw new NullPointerException("knife should be set before token"); } // PaodingTokenizer是TokenStream实现,使用knife解析reader流入的文本 return new PaodingTokenizer(reader, knife, createTokenCollector()); } protected TokenCollector createTokenCollector() { if (modeClass != null) { try { return (TokenCollector) modeClass.newInstance(); } catch (InstantiationException e) { throw new IllegalArgumentException("wrong mode class:" + e.getMessage()); } catch (IllegalAccessException e) { throw new IllegalArgumentException("wrong mode class:" + e.getMessage()); } } switch (mode) { case MOST_WORDS_MODE: return new MostWordsTokenCollector(); case MAX_WORD_LENGTH_MODE: return new MaxWordLengthTokenCollector(); default: throw new Error("never happened"); } } }