package experimental.ising;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import org.javatuples.Pair;
public class MorphItReader extends DataReader {
public MorphItReader(String fileName) {
super();
int lines = 0;
System.out.println("...reading in:\t" + fileName);
try {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
try {
String line = null;
while ((line = reader.readLine()) != null) {
String[] splitted = line.split("\t");
String word = splitted[0];
String lemma = splitted[1];
String tag = splitted[2];
// add word
if (!this.word2Tags.containsKey(word)) {
this.word2Tags.put(word, new HashSet<String>());
this.word2LemmaTag.put(word, new HashSet<Pair<String, String>>());
}
this.word2Tags.get(word).add(tag);
this.word2LemmaTag.get(word).add(new Pair<>(lemma,tag));
// cache features
if (!this.tag2Integer.keySet().contains(tag)) {
this.tag2Integer.put(tag, this.tag2Integer.size());
this.integer2Tag.put(this.tag2Integer.size() - 1, tag);
}
this.numVariables = this.tag2Integer.size();
if (!this.tagNames.contains(tag)) {
this.tagNames.add(tag);
}
++lines;
}
}
catch (IOException ioe) {
System.out.println("Badness");
System.exit(0);
}
}
catch (IOException ioe) {
System.out.println("Badness");
System.exit(0);
}
// add pairs
for (String word : this.word2Tags.keySet()) {
Set<String> set = this.word2Tags.get(word);
for (String tag1 : set) {
for (String tag2 : set) {
int int1 = this.tag2Integer.get(tag1);
int int2 = this.tag2Integer.get(tag2);
if (int1 != int2) {
//this.pairs.add(new Pair<>(int1,int2));
}
}
}
}
// add data
// TODO redundant with the above (can collapse the data structures)
for (String word : this.word2LemmaTag.keySet()) {
ArrayList<String> lemmas = new ArrayList<String>();
ArrayList<Integer> tags = new ArrayList<Integer>();
for (Pair<String, String> p : this.word2LemmaTag.get(word)) {
String lemma = p.getValue0();
int tag = this.tag2Integer.get(p.getValue1());
lemmas.add(lemma);
tags.add(tag);
}
Datum datum = new Datum(word, lemmas, tags);
data.add(datum);
}
for (Pair<Integer, Integer> p : this.pairs) {
String tag1 = this.integer2Tag.get(p.getValue0());
String tag2 = this.integer2Tag.get(p.getValue1());
//System.out.println(tag1 + "\t" + tag2);
}
this.pairsLst = new ArrayList<>(this.pairs);
}
}