package uk.ac.shef.dcs.jate.feature; import org.apache.solr.common.util.Pair; import java.util.*; /** * Specific feature to be used by CValue for efficient computation * <p/> * The index is a map from uni-grams to a list of pairs. Each pair contains a candidate term containing that unigram and the length of the term in terms of * number of tokens (uni-grams) in that term */ public class TermComponentIndex extends AbstractFeature { private Map<String, List<Pair<String, Integer>>> index = new HashMap<>(); public synchronized void add(String unigram, String term, int numTokens) { List<Pair<String, Integer>> contained = index.get(unigram); if (contained == null) contained = new ArrayList<>(); contained.add(new Pair<>(term, numTokens)); index.put(unigram, contained); } public List<Pair<String, Integer>> getSorted(String unigram) { List<Pair<String, Integer>> sorted = new ArrayList<>(); List<Pair<String, Integer>> values=index.get(unigram); if (values!=null) sorted.addAll(values); Collections.sort(sorted, (o1, o2) -> o2.getValue().compareTo(o1.getValue())); return sorted; } }