package uk.ac.shef.dcs.jate.algorithm;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.feature.*;
import uk.ac.shef.dcs.jate.model.JATETerm;
import java.util.*;
import java.util.concurrent.ForkJoinPool;
import org.apache.log4j.Logger;
/**
* An implementation of the Chi Square algorithm. See Matsuo, Y., Ishizuka, M. </i>
* Keyword Extraction from a Single Document Using Word Co-Occurrence Statistical Information. </i>
* Proc. 16th Intl. Florida AI Research Society, 2003, 392-396.
*/
public class ChiSquare extends Algorithm {
private static final Logger LOG = Logger.getLogger(ChiSquare.class.getName());
public static final String SUFFIX_TERM = "_TERM";
public ChiSquare() {
}
@Override
public List<JATETerm> execute(Collection<String> candidates) throws JATEException {
AbstractFeature feature1 = features.get(Cooccurrence.class.getName());
validateFeature(feature1, Cooccurrence.class);
Cooccurrence fFeatureCoocurr = (Cooccurrence) feature1;
AbstractFeature feature2 = features.get(FrequencyCtxBased.class.getName() + SUFFIX_TERM);
validateFeature(feature2, FrequencyCtxBased.class);
FrequencyCtxBased termFeatureCtxBased = (FrequencyCtxBased) feature2;
AbstractFeature feature3 = features.get(ChiSquareFrequentTerms.class.getName());
validateFeature(feature3, ChiSquareFrequentTerms.class);
ChiSquareFrequentTerms refTermExpProb = (ChiSquareFrequentTerms) feature3;
int cores =Runtime.getRuntime().availableProcessors();
int maxPerWorker = candidates.size()/cores;
StringBuilder msg = new StringBuilder("Beginning computing ChiSquare, cores=");
msg.append(cores).append(", total terms=" + candidates.size()).append(",").
append(" max terms per worker thread=").append(maxPerWorker);
LOG.info(msg.toString());
ForkJoinPool forkJoinPool = new ForkJoinPool(cores);
ChiSquareWorker worker = new ChiSquareWorker(new ArrayList<>(candidates), maxPerWorker,
termFeatureCtxBased, fFeatureCoocurr, refTermExpProb
);
List<JATETerm> result = forkJoinPool.invoke(worker);
LOG.info("Complete chisquare calculation.");
Collections.sort(result);
LOG.info("Complete");
return result;
}
}