// Copyright 2013 Thomas Müller // This file is part of MarMoT, which is licensed under GPLv3. package marmot.morph.mapper.spanish; import java.io.File; import java.io.IOException; import java.io.Writer; import marmot.morph.mapper.Node; import marmot.morph.mapper.SyntaxTree; import marmot.morph.mapper.SyntaxTreeIterator; import marmot.util.FileUtils; public class ConllConverter { public static void main(String[] args) throws IOException { for (String filename : args) { SyntaxTreeIterator iterator = new SyntaxTreeIterator(filename, 1, 2, 4, 6, 8, 10, false); File file = new File(filename); String outfile = file.getName() + ".converted"; file = new File(outfile); if (file.exists()) { System.err.println("Error: Outfile already exists: " + file.getAbsolutePath()); System.exit(1); } Writer writer = FileUtils.openFileWriter(outfile); ConllConverter c = new ConllConverter(); while (iterator.hasNext()) { SyntaxTree tree = iterator.next(); for (Node node : tree.getNodes()) { EaglesTag tag = c.convert(node.getPos(), node.getFeats()); node.setMorphTag(tag); } for (Node node : tree.getNodes()) { ((EaglesTag) node.getMorphTag()).normalize(node, false); } tree.write(writer); writer.write('\n'); } writer.close(); } } private EaglesTag convert(String pos, String features) { IulaConverter converter = new IulaConverter(); EaglesTag tag = new EaglesTag(); converter.setPos(pos, tag); for (String feature : features.split("\\|")) { if (feature.equals("_")) { continue; } String[] key_value = feature.split("="); String key = key_value[0].toLowerCase(); String value = key_value[1].toLowerCase(); setKeyValueFeature(key, value, tag, converter); } return tag; } private void setKeyValueFeature(String key, String value, EaglesTag tag, IulaConverter c) { switch (key) { case "postype": case "punct": setType(value, tag); break; case "gen": assert value.length() == 1; c.setGender(value.charAt(0), tag); break; case "num": assert value.length() == 1; // CoNLL 2009 bug if (value.equals("c")) { value = "n"; } c.setNumber(value.charAt(0), tag); break; case "person": assert value.length() == 1; c.setPerson(value.charAt(0), tag); break; case "mood": setMood(value, tag); break; case "tense": setTense(value, tag); break; case "punctenclose": setClosing(value, tag); break; case "case": setCase(value, tag); break; case "contracted": assert value.equals("yes"); tag.form_ = EaglesTag.Form.c; break; case "possessornum": assert value.length() == 1; // CoNLL bug if (!value.equals("c")) { c.setOwnerNumber(value.charAt(0), tag); } break; case "polite": assert value.equals("yes"); tag.politeness_ = EaglesTag.Politeness.p; break; case "posfunction": assert value.equals("participle"); tag.function_ = EaglesTag.Function.p; break; default: System.err.println("Unknown key: " + key); } } private void setCase(String value, EaglesTag tag) { switch (value) { case "accusative": tag.case_ = EaglesTag.Case.a; break; case "dative": tag.case_ = EaglesTag.Case.d; break; case "nominative": tag.case_ = EaglesTag.Case.n; break; case "oblique": tag.case_ = EaglesTag.Case.o; break; default: throw new RuntimeException("Unknown case value: " + value); } } private void setClosing(String value, EaglesTag tag) { switch (value) { case "close": tag.closing_ = EaglesTag.Closing.t; break; case "open": tag.closing_ = EaglesTag.Closing.a; break; default: throw new RuntimeException("Unknown closing value: " + value); } } private void setTense(String value, EaglesTag tag) { switch (value) { case "conditional": tag.tense_ = EaglesTag.Tense.c; break; case "future": tag.tense_ = EaglesTag.Tense.f; break; case "imperfect": tag.tense_ = EaglesTag.Tense.i; break; case "past": tag.tense_ = EaglesTag.Tense.s; break; case "present": tag.tense_ = EaglesTag.Tense.p; break; default: throw new RuntimeException("Unknown tense value: " + value); } } private void setMood(String value, EaglesTag tag) { switch (value) { case "gerund": tag.mood_ = EaglesTag.Mood.g; break; case "imperative": tag.mood_ = EaglesTag.Mood.m; break; case "subjunctive": tag.mood_ = EaglesTag.Mood.s; break; case "infinitive": tag.mood_ = EaglesTag.Mood.n; break; case "pastparticiple": tag.mood_ = EaglesTag.Mood.p; break; case "indicative": tag.mood_ = EaglesTag.Mood.i; break; default: throw new RuntimeException("Unknown mood value: " + value); } } private void setType(String value, EaglesTag tag) { switch (value) { case "article": case "auxiliary": case "exclamationmark": tag.type_ = EaglesTag.Type.a; break; case "common": case "coordinating": case "comma": tag.type_ = EaglesTag.Type.c; break; case "demonstrative": case "colon": tag.type_ = EaglesTag.Type.d; break; case "exclamative": case "quotation": tag.type_ = EaglesTag.Type.e; break; case "general": case "hyphen": tag.type_ = EaglesTag.Type.g; break; case "indefinite": case "questionmark": case "numeral": tag.type_ = EaglesTag.Type.i; break; case "interrogative": case "percentage": tag.type_ = EaglesTag.Type.t; break; case "main": case "principal": case "currency": tag.type_ = EaglesTag.Type.m; break; case "negative": tag.type_ = EaglesTag.Type.n; break; case "ordinal": tag.type_ = EaglesTag.Type.o; break; case "proper": case "personal": case "preposition": case "bracket": case "period": tag.type_ = EaglesTag.Type.p; break; case "qualificative": tag.type_ = EaglesTag.Type.q; break; case "relative": tag.type_ = EaglesTag.Type.r; break; case "semiauxiliary": case "subordinating": case "etc": tag.type_ = EaglesTag.Type.s; break; case "possessive": if (tag.pos_ == EaglesTag.Pos.d) { tag.type_ = EaglesTag.Type.p; } else if (tag.pos_ == EaglesTag.Pos.p) { tag.type_ = EaglesTag.Type.x; } else { assert false; } break; case "semicolon": tag.type_ = EaglesTag.Type.x; break; case "mathsign": tag.type_ = EaglesTag.Type.z; break; case "slash": tag.type_ = EaglesTag.Type.h; break; default: throw new RuntimeException("Unknown type/punct value: " + value); } } }