package nl.uva.sc.parser.subscriber;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import nl.uva.sc.datatypes.Frequency;
import nl.uva.sc.datatypes.StopWord;
import nl.uva.sc.datatypes.Word;
import nl.uva.sc.parser.Token;
public class TermFrequency implements BookParserSubscriber {
HashMap<Word, Token> mIndex = new HashMap<>();
@Override
public void nextWord(final Word word) {
if (StopWord.isStopWord(word)) {
return;
}
Token token = mIndex.get(word);
if (token == null) {
token = new Token(word, new Frequency());
mIndex.put(word, token);
}
token.addFrequency();
}
public List<Token> getSortedTokens() {
List<Token> tokens = new ArrayList<>(mIndex.values());
Collections.sort(tokens);
return tokens;
}
public void printTop(final int topX) {
List<Token> sortedTokenList = getSortedTokens();
int sortedListSize = sortedTokenList.size();
PrintStream print = System.out;
for (int i = 1; i <= topX; ++i) {
Token currentToken = sortedTokenList.get(sortedListSize - i);
print.println(currentToken);
}
}
}