//------------------------------------------------------------------------------ // Copyright (c) 2005, 2006 IBM Corporation and others. // All rights reserved. This program and the accompanying materials // are made available under the terms of the Eclipse Public License v1.0 // which accompanies this distribution, and is available at // http://www.eclipse.org/legal/epl-v10.html // // Contributors: // IBM Corporation - initial implementation //------------------------------------------------------------------------------ package org.eclipse.epf.search.analysis; import java.io.IOException; import java.util.ResourceBundle; import java.util.Set; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.eclipse.epf.common.utils.StrUtil; /** * A text filter that handles Unicode 4.1 characters. * * @author Kelvin Low * @since 1.0 */ public final class TextFilter extends TokenFilter { private static Set stopWords = null; /** * Creates a new instance. */ public TextFilter(TokenStream in) { super(in); if (stopWords == null) { loadStopWords(); } } /** * @see org.apache.lucene.analysis.TokenStream#next() */ public final Token next() throws IOException { for (Token token = input.next(); token != null; token = input.next()) { String tokenText = token.termText(); if (!stopWords.contains(tokenText)) { return token; } } return null; } /** * Loads the stop words defined in the StopWords.properties file. */ private void loadStopWords() { String[] words = null; try { ResourceBundle bundle = ResourceBundle.getBundle(TextFilter.class .getPackage().getName() + ".StopWords"); //$NON-NLS-1$ String property = bundle.getString("Search.stopWords"); //$NON-NLS-1$ words = StrUtil.split(property, " ,"); //$NON-NLS-1$ } catch (Exception e) { words = StopAnalyzer.ENGLISH_STOP_WORDS; } stopWords = StopFilter.makeStopSet(words); } }