package edu.berkeley.cs.nlp.ocular.data.textreader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import edu.berkeley.cs.nlp.ocular.util.CollectionHelper;
import edu.berkeley.cs.nlp.ocular.util.Tuple2;
import static edu.berkeley.cs.nlp.ocular.util.Tuple2.Tuple2;
import tberg.murphy.fileio.f;
/**
* @author Dan Garrette (dhgarrette@gmail.com)
*/
public class ReplaceSomeTextReader implements TextReader {
private final List<Tuple2<Tuple2<List<String>, List<String>>, Integer>> rules;
private final TextReader delegate;
private final int[] occurrences;
/**
* @param delegate
* @param rules <<input, output>, each> Replace `input` by `output` every `each` occurrences
*/
public ReplaceSomeTextReader(List<Tuple2<Tuple2<List<String>, List<String>>, Integer>> rules, TextReader delegate) {
this.rules = rules;
this.delegate = delegate;
this.occurrences = new int[rules.size()];
}
public List<String> readCharacters(String line) {
List<String> result = delegate.readCharacters(line);
for (int i = 0; i < rules.size(); ++i) {
Tuple2<Tuple2<List<String>, List<String>>, Integer> r = rules.get(i);
List<String> input = r._1._1;
List<String> output = r._1._2;
int each = r._2;
List<String> newResult = new ArrayList<String>();
for (int j = 0; j < input.size() - 1; ++j) {
// add some buffer to the end so sliding goes to the end
result.add(null);
}
Iterator<List<String>> iter = CollectionHelper.sliding(result, input.size());
while (iter.hasNext()) {
List<String> x = iter.next();
if (x.equals(input)) {
if (x.equals(input) && occurrences[i] % each == each - 1) {
newResult.addAll(output); // add `output` to the result (to replace `input`)
for (int j = 0; j < input.size() - 1; ++j) {
//remove the rest of `input` from `iter`
iter.next();
}
}
else {
newResult.add(x.get(0));
}
++occurrences[i];
}
else {
newResult.add(x.get(0));
}
}
result = newResult;
}
return result;
}
public static List<Tuple2<Tuple2<List<String>, List<String>>, Integer>> loadRulesFromFile(String path) {
List<Tuple2<Tuple2<List<String>, List<String>>, Integer>> result = new ArrayList<Tuple2<Tuple2<List<String>, List<String>>, Integer>>();
for (String line : f.readLines(path)) {
if (!line.trim().isEmpty()) {
String[] parts = line.split("\t");
if (parts.length != 3) throw new RuntimeException("line does not contain 3 parts. found: " + Arrays.asList(parts));
result.add(Tuple2(Tuple2(Charset.readNormalizeCharacters(parts[0]), Charset.readNormalizeCharacters(parts[1])), Integer.valueOf(parts[2])));
}
}
return result;
}
public String toString() {
return "ReplaceSomeTextReader(rules=..., " + delegate + ")";
}
}