package ir.ac.iust.nlp.dependencyparser.converter;
/**
*
* @author Mojtaba Khallash
*/
public class ConverterGenerator {
public static String generateConverter(String type) {
int ind = type.lastIndexOf(".py");
if (ind != -1) {
type = type.substring(0, ind);
}
switch (type) {
case "conll2mst":
return generateCoNLL2MST();
case "mst2conll":
return generateMST2CoNLL();
}
return "";
}
// generate "conll2mst.py" file
private static String generateCoNLL2MST() {
StringBuilder content = new StringBuilder();
content.append("#! /usr/bin/python\n\n");
content.append("import re;\n");
content.append("import sys;\n\n");
content.append("# Open File\n");
content.append("f = open(sys.argv[1],'rt');\n\n");
content.append("wrds = \"\"; pos = \"\"; labs = \"\"; par = \"\";\n\n");
content.append("for line in f:\n\n");
content.append(" if len(line.strip()) > 0:\n");
content.append(" sent = re.split(\"\\t\", line);\n\n");
content.append(" wrds += sent[1] + \"\\t\";\n");
content.append(" pos += sent[4] + \"\\t\";\n");
content.append(" labs += sent[7] + \"\\t\";\n");
content.append(" par += sent[6] + \"\\t\";\n");
content.append(" else:\n");
content.append(" print wrds; wrds = \"\";\n");
content.append(" print pos; pos = \"\";\n");
content.append(" print labs; labs = \"\";\n");
content.append(" print par; par = \"\";\n");
content.append(" print \"\";\n\n");
content.append("f.close();\n\n");
return content.toString();
}
// generate "mst2conll.py" file
public static String generateMST2CoNLL() {
StringBuilder content = new StringBuilder();
content.append("#! /usr/bin/python\n\n");
content.append("import re;\n");
content.append("import sys;\n\n");
content.append("# Open File\n");
content.append("f = open(sys.argv[1],'rt');\n\n");
content.append("wrds = \"\";\n");
content.append("pos = \"\";\n");
content.append("labs = \"\";\n");
content.append("par = \"\";\n\n");
content.append("for line in f:\n\n");
content.append(" if len(line.strip()) == 0:\n");
content.append(" w = re.split(\"\\t\", wrds); p = re.split(\"\\t\", pos); l = re.split(\"\\t\", labs); pa = re.split(\"\\t\", par);\n");
content.append(" cnt = 1;\n");
content.append(" for t in w:\n");
content.append(" print str(cnt) + \"\\t\" + t + \"\\t\" + t + \"\\t\" + p[cnt-1] + \"\\t\" + p[cnt-1] + \"\\t_\\t\" + pa[cnt-1] + \"\\t\" + l[cnt-1];\n");
content.append(" cnt += 1;\n");
content.append(" print \"\";\n");
content.append(" wrds = \"\"; pos = \"\"; labs = \"\"; par = \"\";\n");
content.append(" elif len(wrds) == 0:\n");
content.append(" wrds = line.strip();\n");
content.append(" elif len(pos) == 0:\n");
content.append(" pos = line.strip();\n");
content.append(" elif len(labs) == 0:\n");
content.append(" labs = line.strip();\n");
content.append(" else:\n");
content.append(" par = line.strip();\n\n");
content.append("f.close();\n\n");
return content.toString();
}
}