package clear.engine;
import clear.morph.MorphEnAnalyzer;
import clear.treebank.TBNode;
import clear.treebank.TBReader;
import clear.treebank.TBTree;
import clear.util.IOUtil;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
public class ExtractVerbs {
HashSet<String> m_raising;
HashSet<String> m_sbjControl;
HashSet<String> m_objControl;
public ExtractVerbs(String parseFile, String outputFile, String dictFile) {
TBReader reader = new TBReader(parseFile);
MorphEnAnalyzer morph = new MorphEnAnalyzer(dictFile);
TBTree tree;
m_raising = new HashSet<>();
m_sbjControl = new HashSet<>();
m_objControl = new HashSet<>();
int n;
for (n = 0; (tree = reader.nextTree()) != null; n++) {
extract(tree.getRootNode(), morph);
if (n % 1000 == 0) {
System.out.print("\r" + n + "K");
}
}
System.out.println("\r" + n);
print(outputFile);
}
final void extract(TBNode curr, MorphEnAnalyzer morph) {
if (curr.isPhrase()) {
if (curr.isPos("VP")) {
String lemma = null;
for (TBNode child : curr.getChildren()) {
if (child.isPos("VB.*")) {
lemma = morph.getLemma(child.form, child.pos);
} else if (child.isPos("PRP|RP") && lemma != null) {
lemma += "_" + child.pos.toLowerCase();
} else if (child.isPos("S") && lemma != null) {
}
}
}
}
}
final void print(String outputFile) {
PrintStream fout = IOUtil.createPrintFileStream(outputFile + ".raising");
ArrayList<String> list = new ArrayList<>(m_raising);
Collections.sort(list);
for (String item : list) {
fout.println(item);
}
fout = IOUtil.createPrintFileStream(outputFile + ".sbjControl");
list = new ArrayList<>(m_sbjControl);
Collections.sort(list);
for (String item : list) {
fout.println(item);
}
fout = IOUtil.createPrintFileStream(outputFile + ".objControl");
list = new ArrayList<>(m_objControl);
Collections.sort(list);
for (String item : list) {
fout.println(item);
}
}
static public void main(String[] args) {
ExtractVerbs extractVerbs = new ExtractVerbs(args[0], args[1], args[2]);
}
}