package uk.ac.shef.dcs.jate.feature; import org.apache.log4j.Logger; import org.apache.solr.common.util.Pair; import uk.ac.shef.dcs.jate.JATERecursiveTaskWorker; import java.util.*; import java.util.regex.Pattern; /** * Created by zqz on 17/09/2015. */ class ContainmentFBWorker extends JATERecursiveTaskWorker<String, int[]> { private static final long serialVersionUID = -1208424489000405913L; private static final Logger LOG = Logger.getLogger(ContainmentFBWorker.class.getName()); private Containment feature; private TermComponentIndex featureTermCompIndex; ContainmentFBWorker(List<String> taskTerms, int maxTasksPerWorker, Containment feature, TermComponentIndex featureTermCompIndex) { super(taskTerms, maxTasksPerWorker); this.feature = feature; this.featureTermCompIndex = featureTermCompIndex; } @Override protected JATERecursiveTaskWorker<String, int[]> createInstance(List<String> termSplit) { return new ContainmentFBWorker(termSplit, maxTasksPerThread, feature, featureTermCompIndex); } @Override protected int[] mergeResult(List<JATERecursiveTaskWorker<String, int[]>> jateRecursiveTaskWorkers) { int totalSuccess = 0, total = 0; for (JATERecursiveTaskWorker<String, int[]> worker : jateRecursiveTaskWorkers) { int[] rs = worker.join(); totalSuccess += rs[0]; total += rs[1]; } return new int[]{totalSuccess, total}; } @Override protected int[] computeSingleWorker(List<String> taskTerms) { int count = 0; LOG.info("Total terms to process=" + taskTerms.size()); for (String termString : taskTerms) { String[] tokens = termString.split(" "); Set<String> compareCandidates = new HashSet<>(); for (String tok : tokens) { List<Pair<String, Integer>> candidates = featureTermCompIndex.getSorted(tok); Iterator<Pair<String, Integer>> it = candidates.iterator(); while (it.hasNext()) { Pair<String, Integer> c = it.next(); if (c.getValue() <= tokens.length) break; compareCandidates.add(c.getKey()); } } StringBuilder pStr = new StringBuilder("(?<!\\w)"); pStr.append(Pattern.quote(termString)).append("(?!\\w)"); Pattern pattern = Pattern.compile(pStr.toString()); for (String pterm : compareCandidates) { if (pattern.matcher(pterm).find()) { //ref term contains term feature.add(termString, pterm); } } count++; if (count % 2000 == 0) LOG.debug(count + "/" + taskTerms.size()); } return new int[]{count, taskTerms.size()}; } }