package resa.evaluation.topology.fp;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
/**
* Created by ding on 14-6-5.
*/
public class PatternGenerator extends BaseRichBolt implements Constant {
private OutputCollector collector;
private Set<String> words;
private Map<String, Integer> dict;
private List<Integer> targetTasks;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
words = new HashSet<>();
dict = new HashMap<>();
int id = 0;
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(this.getClass().getResourceAsStream((String) stormConf.get(DICT_FILE_PROP))))) {
String line = null;
while ((line = reader.readLine()) != null) {
dict.put(line, id++);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
targetTasks = context.getComponentTasks("detector");
Collections.sort(targetTasks);
}
@Override
public void execute(Tuple input) {
String sentence = input.getStringByField(SENTENCE_FIELD);
StringTokenizer tokenizer = new StringTokenizer(sentence.replaceAll("\\p{P}|\\p{S}", " "));
while (tokenizer.hasMoreTokens()) {
String word = tokenizer.nextToken();
words.add(word);
}
int[] wordIds = words.stream().map(this::word2Id).filter(Objects::nonNull).mapToInt(i -> i).sorted().toArray();
if (wordIds.length > 0) {
emitSubPattern(wordIds, collector, input);
}
words.clear();
collector.ack(input);
}
private void emitSubPattern(int[] wordIds, OutputCollector collector, Tuple input) {
int n = wordIds.length;
int[] buffer = new int[n];
ArrayList<WordList>[] wordListForTargetTask = new ArrayList[targetTasks.size()];
for (int i = 1; i < (1 << n); i++) {
int k = 0;
for (int j = 0; j < n; j++) {
if ((i & (1 << j)) > 0) {
buffer[k++] = wordIds[j];
}
}
//doneTODO:
///collector.emit(input, Arrays.asList(new WordList(Arrays.copyOf(buffer, k)),
///input.getValueByField(IS_ADD_FIELD)));
WordList wl = new WordList(Arrays.copyOf(buffer, k));
int targetIndex = WordList.getPartition(targetTasks.size(), wl);
if (wordListForTargetTask[targetIndex] == null) {
wordListForTargetTask[targetIndex] = new ArrayList<>();
}
wordListForTargetTask[targetIndex].add(wl);
}
for (int i = 0; i < wordListForTargetTask.length; i++) {
if (wordListForTargetTask[i] != null && wordListForTargetTask[i].size() > 0) {
collector.emitDirect(
targetTasks.get(i),
input,
Arrays.asList(wordListForTargetTask[i],
input.getValueByField(IS_ADD_FIELD)));
}
}
}
private Integer word2Id(String word) {
return dict.get(word);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//declarer.declare(new Fields(PATTERN_FIELD, IS_ADD_FIELD));
//doneTODO: add true for direct grouping
declarer.declare(true, new Fields(PATTERN_FIELD, IS_ADD_FIELD));
}
}