package experimental.analyzer;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import marmot.core.Sequence;
import marmot.core.Token;
import marmot.morph.Word;
import marmot.morph.io.SentenceReader;
import marmot.util.Mutable;
public class AnalyzerInstance implements Serializable {
private static final long serialVersionUID = 1L;
private String form_;
private Collection<AnalyzerReading> readings_;
public AnalyzerInstance(Collection<Token> sequence) {
form_ = null;
Set<AnalyzerReading> tags = new HashSet<>();
for (Token token : sequence) {
Word word = (Word) token;
if (form_ == null) {
form_ = word.getWordForm();
}
assert form_.equals(word.getWordForm());
AnalyzerReading tag = new AnalyzerReading(word);
tags.add(tag);
}
readings_ = new ArrayList<>(tags);
}
public AnalyzerInstance(String form, Collection<AnalyzerReading> readings) {
form_ = form;
readings_ = readings;
}
public static Collection<AnalyzerInstance> getInstances(String filename) {
List<AnalyzerInstance> list = new LinkedList<>();
for (Sequence sequence : new SentenceReader(filename)) {
list.add(new AnalyzerInstance(sequence));
}
return list;
}
public static Collection<AnalyzerInstance> getTreebankInstances(String filename) {
Map<String, Map<AnalyzerReading, Mutable<Integer>>> map = new HashMap<>();
for (Sequence sequence : new SentenceReader(filename)) {
for (Token token : sequence) {
Word word = (Word) token;
Map<AnalyzerReading, Mutable<Integer>> instance = map.get(word.getWordForm());
if (instance == null) {
instance = new HashMap<>();
map.put(word.getWordForm(), instance);
}
AnalyzerReading reading = new AnalyzerReading(word);
Mutable<Integer> i = instance.get(reading);
if (i == null) {
i = new Mutable<Integer>(0);
instance.put(reading, i);
}
i.set(i.get() + 1);
}
}
List<AnalyzerInstance> list = new LinkedList<>();
for (Map.Entry<String, Map<AnalyzerReading, Mutable<Integer>>> entry : map.entrySet()) {
String word = entry.getKey();
Map<AnalyzerReading, Mutable<Integer>> m = entry.getValue();
Collection<AnalyzerReading> readings = new LinkedList<>();
for (Map.Entry<AnalyzerReading, Mutable<Integer>> m_entry : m.entrySet()) {
AnalyzerReading reading = m_entry.getKey();
Mutable<Integer> i = m_entry.getValue();
reading.setCount(i.get());
readings.add(reading);
}
list.add(new AnalyzerInstance(word, readings));
}
return list;
}
public String getForm() {
return form_;
}
public Collection<AnalyzerReading> getReadings() {
return readings_;
}
}