package edu.stanford.nlp.trees; import edu.stanford.nlp.util.logging.Redwood; import java.io.IOException; import java.util.List; import edu.stanford.nlp.trees.tregex.TregexPattern; import edu.stanford.nlp.trees.tregex.TregexPatternCompiler; import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon; import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern; import edu.stanford.nlp.util.Pair; /** * Helper class to perform a context-sensitive mapping of POS * tags in a tree to universal POS tags. * * @author Sebastian Schuster */ public class UniversalPOSMapper { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(UniversalPOSMapper.class); public static final String DEFAULT_TSURGEON_FILE = "edu/stanford/nlp/models/upos/ENUniversalPOS.tsurgeon"; private static boolean loaded = false; private static List<Pair<TregexPattern, TsurgeonPattern>> operations = null; private UniversalPOSMapper() {} // static methods public static void load() { load(DEFAULT_TSURGEON_FILE); } public static void load(String filename) { loaded = true; try { operations = Tsurgeon.getOperationsFromFile(filename, "UTF-8", new TregexPatternCompiler()); } catch (IOException e) { System.err.printf("%s: Warning - could not load Tsurgeon file from %s.%n", UniversalPOSMapper.class.getSimpleName(), filename); } } public static Tree mapTree(Tree t) { if (!loaded) { load(); } if (operations == null) { return t; } return Tsurgeon.processPatternsOnTree(operations, t.deepCopy()); } }