package uk.ac.shef.dcs.jate.algorithm; import uk.ac.shef.dcs.jate.JATEException; import uk.ac.shef.dcs.jate.feature.AbstractFeature; import uk.ac.shef.dcs.jate.feature.Containment; import uk.ac.shef.dcs.jate.feature.FrequencyTermBased; import uk.ac.shef.dcs.jate.model.JATETerm; import java.util.*; import java.util.concurrent.ForkJoinPool; import org.apache.log4j.Logger; /** * An implementation of the CValue term recognition algorithm. See Frantzi et. al 2000, <i> * Automatic recognition of multi-word terms: the C-value/NC-value method</i> */ public class CValue extends Algorithm { private static final Logger LOG = Logger.getLogger(CValue.class.getName()); public CValue() { } @Override public List<JATETerm> execute(Collection<String> candidates) throws JATEException { AbstractFeature feature = features.get(FrequencyTermBased.class.getName()); validateFeature(feature, FrequencyTermBased.class); FrequencyTermBased fFeature = (FrequencyTermBased) feature; AbstractFeature feature2 = features.get(Containment.class.getName()); validateFeature(feature2, Containment.class); Containment cFeature = (Containment) feature2; int cores = Runtime.getRuntime().availableProcessors(); int maxPerWorker=candidates.size()/cores; StringBuilder msg = new StringBuilder("Beginning computing CValue, cores="); msg.append(cores).append(", total terms=" + candidates.size()).append(","). append(" max terms per worker thread=").append(maxPerWorker); LOG.info(msg.toString()); ForkJoinPool forkJoinPool = new ForkJoinPool(cores); CValueWorker worker = new CValueWorker(new ArrayList<>(candidates), maxPerWorker, fFeature, cFeature ); List<JATETerm> result = forkJoinPool.invoke(worker); Collections.sort(result); LOG.info("Complete"); return result; } }