// This file is part of AceWiki. // Copyright 2008-2013, AceWiki developers. // // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU // Lesser General Public License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. // // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If // not, see http://www.gnu.org/licenses/. package ch.uzh.ifi.attempto.aceeditor; import ch.uzh.ifi.attempto.base.TextElement; import ch.uzh.ifi.attempto.chartparser.LexicalRule; import ch.uzh.ifi.attempto.chartparser.Preterminal; import ch.uzh.ifi.attempto.preditor.MenuEntry; /** * This class manages words for the lexicon of the ACE Editor. * * @author Tobias Kuhn */ class Word { private String wordForm, symbol, entry; private Preterminal category; private final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(this.getClass()); /** * Generates a new word on the basis of a lexicon entry according to the ACE lexicon * specification. * * @param lexiconEntry A lexicon entry in the ACE lexicon format. */ public Word(String lexiconEntry) { if (lexiconEntry.matches("[a-z_]+\\('?[A-Za-z0-9-_]+'?,\\s*'?[A-Za-z0-9-_]+'?.*\\)\\.\\s*")) { entry = lexiconEntry.replaceFirst("^(.*)\\.\\s*$", "$1"); category = new Preterminal(lexiconEntry.replaceFirst("^([a-z_]+)\\(.*$", "$1")); if (category.getName().equals("pn_sg")) category = new Preterminal("prop_sg"); if (category.getName().equals("pndef_sg")) category = new Preterminal("propdef_sg"); wordForm = lexiconEntry.replaceFirst("^[a-z_]+\\('?([A-Za-z0-9-_]+)'?,.*$", "$1"); symbol = lexiconEntry.replaceFirst("^[a-z_]+\\('?[A-Za-z0-9-_]+'?,\\s*'?([A-Za-z0-9-_]+)'?.*$", "$1"); readFeatures(lexiconEntry.replaceFirst("^[a-z_]+\\('?[A-Za-z0-9-_]+'?,\\s*'?[A-Za-z0-9-_]+'?(.*)\\)\\.\\s*", "$1")); category.setFeature("text", wordForm); } else if (!lexiconEntry.equals("") && !lexiconEntry.matches("\\s*%.*")) { log.warn("Invalid lexicon entry: {}", lexiconEntry); } } /** * Returns the word form how it appears in ACE texts. * * @return The word form. */ public String getWordForm() { return wordForm; } /** * Returns the text of a token representing this word. For proper names with definite articles, * the article "the" is part of the token but not part of the word form. Otherwise, the token * text is the same as the word form. * * @return The token text. */ public String getTokenText() { if (category.getName().equals("propdef_sg")) { return "the " + wordForm; } else { return wordForm; } } /** * Returns the symbol of this word how it appears in the logical representation. * * @return The symbol of this word. */ public String getSymbol() { return symbol; } /** * Returns this word as a lexicon entry accoring to the ACE lexicon format. * * @return The lexicon entry in the ACE lexicon format. */ public String getEntry() { return entry; } /** * Returns the pre-terminal category for this word form. * * @return The pre-terminal category. */ public Preterminal getCategory() { return category; } /** * Returns the lexical rule for this word form. * * @return The lexical rule. */ public LexicalRule getLexicalRule() { return new LexicalRule(category, wordForm); } /** * Creates a text element containing this word. * * @return A new text element. */ public TextElement getTextElement() { return new TextElement(getTokenText()); } /** * Creates a menu entry containing this word. * * @param menuGroup The menu group of the menu entry to be created. * @return A new menu entry. */ public MenuEntry getMenuEntry(String menuGroup) { return new MenuEntry(getTextElement(), menuGroup); } private void readFeatures(String featureString) { String[] s = featureString .replaceAll("\\s+", "") .replaceFirst("^,", "") .replaceAll("^'", "") .replaceAll("'$", "") .replaceAll(",'", "") .replaceAll("',", "") .split(","); String n = category.getName(); if (n.equals("adj_tr") || n.equals("adj_tr_comp")) { category.setFeature("prep", s[0]); } else if (n.equals("noun_sg") || n.equals("noun_pl") || n.equals("prop_sg") || n.equals("propdef_sg")) { if (s[0].equals("masc") || s[0].equals("fem")) { category.setFeature("human", "plus"); category.setFeature("gender", s[0]); } else if (s[0].equals("human")) { category.setFeature("human", "plus"); } else if (s[0].equals("neutr")) { category.setFeature("human", "minus"); } } } }