package edu.stanford.nlp.patterns.surface; import java.io.IOException; import java.util.*; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import edu.stanford.nlp.patterns.*; import edu.stanford.nlp.util.*; import edu.stanford.nlp.util.logging.Redwood; public class CreatePatterns<E> { //String channelNameLogger = "createpatterns"; ConstantsAndVariables constVars; public CreatePatterns(Properties props, ConstantsAndVariables constVars) throws IOException { this.constVars = constVars; ArgumentParser.fillOptions(ConstantsAndVariables.class, props); constVars.setUp(props); setUp(props); } void setUp(Properties props) { ArgumentParser.fillOptions(this, props); } /** * creates all patterns and saves them in the correct PatternsForEachToken* class appropriately * @param sents * @param props * @param storePatsForEachTokenWay */ public void getAllPatterns(Map<String, DataInstance> sents, Properties props, ConstantsAndVariables.PatternForEachTokenWay storePatsForEachTokenWay) { // this.patternsForEachToken = new HashMap<String, Map<Integer, Triple<Set<Integer>, Set<Integer>, Set<Integer>>>>(); // this.patternsForEachToken = new HashMap<String, Map<Integer, Set<Integer>>>(); Date startDate = new Date(); List<String> keyset = new ArrayList<>(sents.keySet()); int num; if (constVars.numThreads == 1) num = keyset.size(); else num = keyset.size() / (constVars.numThreads); ExecutorService executor = Executors .newFixedThreadPool(constVars.numThreads); Redwood.log(ConstantsAndVariables.extremedebug, "Computing all patterns. keyset size is " + keyset.size() + ". Assigning " + num + " values to each thread"); List<Future<Boolean>> list = new ArrayList<>(); for (int i = 0; i < constVars.numThreads; i++) { int from = i * num; int to = -1; if(i == constVars.numThreads -1) to = keyset.size(); else to =Math.min(keyset.size(), (i + 1) * num); // // Redwood.log(ConstantsAndVariables.extremedebug, "assigning from " + i * num // + " till " + Math.min(keyset.size(), (i + 1) * num)); List<String> ids = keyset.subList(from ,to); Callable<Boolean> task = new CreatePatternsThread(sents, ids, props, storePatsForEachTokenWay); Future<Boolean> submit = executor .submit(task); list.add(submit); } // Now retrieve the result for (Future<Boolean> future : list) { try{ future.get(); //patternsForEachToken.putAll(future.get()); } catch(Exception e){ executor.shutdownNow(); throw new RuntimeException(e); } } executor.shutdown(); Date endDate = new Date(); String timeTaken = GetPatternsFromDataMultiClass.elapsedTime(startDate, endDate); Redwood.log(Redwood.DBG, "Done computing all patterns ["+timeTaken+"]"); //return patternsForEachToken; } // /** // * Returns null if using DB backed!! // * @return // */ // public Map<String, Map<Integer, Set<Integer>>> getPatternsForEachToken() { // return patternsForEachToken; // } public class CreatePatternsThread implements Callable<Boolean> { //String label; // Class otherClass; Map<String, DataInstance> sents; List<String> sentIds; PatternsForEachToken<E> patsForEach; public CreatePatternsThread(Map<String, DataInstance> sents, List<String> sentIds, Properties props, ConstantsAndVariables.PatternForEachTokenWay storePatsForEachToken) { //this.label = label; // this.otherClass = otherClass; this.sents = sents; this.sentIds = sentIds; this.patsForEach = PatternsForEachToken.getPatternsInstance(props, storePatsForEachToken); } @Override public Boolean call() throws Exception { Map<String, Map<Integer, Set<E>>> tempPatternsForTokens = new HashMap<>(); int numSentencesInOneCommit = 0; for (String id : sentIds) { DataInstance sent = sents.get(id); if(!constVars.storePatsForEachToken.equals(ConstantsAndVariables.PatternForEachTokenWay.MEMORY)) tempPatternsForTokens.put(id, new HashMap<>()); Map<Integer, Set<E>> p = (Map) PatternFactory.getPatternsAroundTokens(constVars.patternType, sent, constVars.getStopWords()); //to save number of commits to the database if(!constVars.storePatsForEachToken.equals(ConstantsAndVariables.PatternForEachTokenWay.MEMORY)){ tempPatternsForTokens.put(id, p); numSentencesInOneCommit++; if(numSentencesInOneCommit % 1000 == 0){ patsForEach.addPatterns(tempPatternsForTokens); tempPatternsForTokens.clear(); numSentencesInOneCommit = 0; } // patsForEach.addPatterns(id, p); } else patsForEach.addPatterns(id, p); } //For the remaining sentences if(!constVars.storePatsForEachToken.equals(ConstantsAndVariables.PatternForEachTokenWay.MEMORY)) patsForEach.addPatterns(tempPatternsForTokens); return true; } } }