package uk.ac.shef.dcs.jate.algorithm;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.feature.AbstractFeature;
import uk.ac.shef.dcs.jate.feature.Containment;
import uk.ac.shef.dcs.jate.feature.FrequencyTermBased;
import uk.ac.shef.dcs.jate.model.JATETerm;
import java.util.*;
import java.util.concurrent.ForkJoinPool;
import org.apache.log4j.Logger;
/**
* An implementation of the CValue term recognition algorithm. See Frantzi et. al 2000, <i>
* Automatic recognition of multi-word terms: the C-value/NC-value method</i>
*/
public class CValue extends Algorithm {
private static final Logger LOG = Logger.getLogger(CValue.class.getName());
public CValue() {
}
@Override
public List<JATETerm> execute(Collection<String> candidates) throws JATEException {
AbstractFeature feature = features.get(FrequencyTermBased.class.getName());
validateFeature(feature, FrequencyTermBased.class);
FrequencyTermBased fFeature = (FrequencyTermBased) feature;
AbstractFeature feature2 = features.get(Containment.class.getName());
validateFeature(feature2, Containment.class);
Containment cFeature = (Containment) feature2;
int cores = Runtime.getRuntime().availableProcessors();
int maxPerWorker=candidates.size()/cores;
StringBuilder msg = new StringBuilder("Beginning computing CValue, cores=");
msg.append(cores).append(", total terms=" + candidates.size()).append(",").
append(" max terms per worker thread=").append(maxPerWorker);
LOG.info(msg.toString());
ForkJoinPool forkJoinPool = new ForkJoinPool(cores);
CValueWorker worker = new CValueWorker(new ArrayList<>(candidates), maxPerWorker, fFeature,
cFeature
);
List<JATETerm> result = forkJoinPool.invoke(worker);
Collections.sort(result);
LOG.info("Complete");
return result;
}
}