package uk.ac.shef.dcs.jate.feature;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.solr.search.SolrIndexSearcher;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.JATEProperties;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ForkJoinPool;
import org.apache.log4j.Logger;
public class FrequencyCtxSentenceBasedFBMaster extends AbstractFeatureBuilder {
private static final Logger LOG = Logger.getLogger(FrequencyCtxSentenceBasedFBMaster.class.getName());
private int termOrWord; //0 means term; 1 means word
public FrequencyCtxSentenceBasedFBMaster(SolrIndexSearcher solrIndexSearcher, JATEProperties properties,
int termOrWord) {
super(solrIndexSearcher, properties);
this.termOrWord=termOrWord;
}
@Override
public AbstractFeature build() throws JATEException {
FrequencyCtxBased feature = new FrequencyCtxBased();
List<Integer> allDocs = new ArrayList<>();
for (int i = 0; i < solrIndexSearcher.maxDoc(); i++) {
allDocs.add(i);
}
try {
Set<String> allCandidates;
if(termOrWord==0)
allCandidates=getUniqueTerms();
else
allCandidates=getUniqueWords();
//start workers
int cores = properties.getMaxCPUCores();
cores = cores == 0 ? 1 : cores;
int maxPerThread = allDocs.size()/cores;
maxPerThread = getMaxPerThread(maxPerThread);
FrequencyCtxSentenceBasedFBWorker worker = new
FrequencyCtxSentenceBasedFBWorker(feature, properties, allDocs, allCandidates,
solrIndexSearcher, maxPerThread
);
StringBuilder sb = new StringBuilder("Building features using cpu cores=");
sb.append(cores).append(", total docs=").append(allDocs.size()).append(", max per worker=")
.append(maxPerThread);
LOG.info(sb.toString());
ForkJoinPool forkJoinPool = new ForkJoinPool(cores);
int total = forkJoinPool.invoke(worker);
sb = new StringBuilder("Complete building features. Total sentence ctx=");
sb.append(feature.getMapCtx2TTF().size()).append(", from total processed docs=").append(total);
LOG.info(sb.toString());
} catch (IOException ioe) {
StringBuilder sb = new StringBuilder("Failed to build features!");
sb.append("\n").append(ExceptionUtils.getFullStackTrace(ioe));
LOG.error(sb.toString());
throw new JATEException(sb.toString());
}
return feature;
}
private int getMaxPerThread(int maxPerThread) {
if(maxPerThread < MIN_SEQUENTIAL_THRESHOLD) {
maxPerThread = MIN_SEQUENTIAL_THRESHOLD;
} else if (maxPerThread > MAX_SEQUENTIAL_THRESHOLD) {
maxPerThread = MAX_SEQUENTIAL_THRESHOLD;
}
return maxPerThread;
}
}