package edu.umd.rhsmith.diads.tools.tfidf; import java.util.List; import java.util.Map; import java.util.TreeMap; public class DefaultTermExtractor implements ITermExtractor { private TermCleaner textCleaner; private TermSplitter splitter; private TermFilter filter; public DefaultTermExtractor(TermCleaner cleaner, TermSplitter splitter, TermFilter filter) { this.textCleaner = cleaner; this.splitter = splitter; this.filter = filter; } public DefaultTermExtractor() { this.textCleaner = new DefaultTermCleaner(); this.splitter = new DefaultTermSplitter(); this.filter = new DefaultTermFilter(); } public TermCleaner getTextCleaner() { return textCleaner; } public void setTextCleaner(TermCleaner cleaner) { this.textCleaner = cleaner; } public TermSplitter getSplitter() { return splitter; } public void setSplitter(TermSplitter splitter) { this.splitter = splitter; } public TermFilter getFilter() { return filter; } public void setFilter(TermFilter filter) { this.filter = filter; } @Override public Map<String, Double> process(String analysisText) { Map<String, Double> tf = new TreeMap<String, Double>(); analysisText = cleanText(analysisText); if (analysisText == null) { return tf; } List<String> terms = getTerms(analysisText); terms = filterTerms(terms); for (String term : terms) { if (term != null) { Double val = tf.get(term); if (val == null) { val = 0.0; } tf.put(term, val + 1.0); } } return tf; } protected String cleanText(String analysisText) { if (this.textCleaner != null) { analysisText = textCleaner.clean(analysisText); } return analysisText; } protected List<String> getTerms(String analysisText) { List<String> terms = null; if (this.splitter != null) { terms = splitter.getTerms(analysisText); } return terms; } protected List<String> filterTerms(List<String> terms) { if (this.filter != null) { terms = filter.filterTerms(terms); } return terms; } }