package kea.stopwords; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.Hashtable; /** * Class that can test whether a given string is a stop word. Lowercases all * words before the test. * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version 1.0 */ public class StopwordsEnglish extends Stopwords { /** * */ private static final long serialVersionUID = 1L; /** The hashtable containing the list of stopwords */ private Hashtable<String,Double> m_Stopwords = null; public StopwordsEnglish(String stopwordsPath) { if (m_Stopwords == null) { m_Stopwords = new Hashtable<String,Double>(); Double dummy = new Double(0); File txt = new File(stopwordsPath); InputStreamReader is; String sw = null; try { is = new InputStreamReader(new FileInputStream(txt), "UTF-8"); BufferedReader br = new BufferedReader(is); while ((sw = br.readLine()) != null) { m_Stopwords.put(sw, dummy); } } catch (Exception e) { e.printStackTrace(); } } } /** * Returns true if the given string is a stop word. */ public boolean isStopword(String str) { return m_Stopwords.containsKey(str.toLowerCase()); } }