/* * (c) 2000-2009 Carlos G�mez Rodr�guez, todos los derechos reservados / all rights reserved. * Licencia en license/bsd.txt / License in license/bsd.txt */ package eu.irreality.age; import java.lang.*; import java.io.*; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import eu.irreality.age.filemanagement.Paths; import eu.irreality.age.i18n.UIMessages; import eu.irreality.age.language.Catalan; import eu.irreality.age.language.English; import eu.irreality.age.language.Esperanto; import eu.irreality.age.language.Galician; import eu.irreality.age.language.LanguageUtils; import eu.irreality.age.language.Mentions; import eu.irreality.age.language.Spanish; import eu.irreality.age.language.Translation; import eu.irreality.age.spell.Correction; import eu.irreality.age.spell.ReferenceNameCorrector; import eu.irreality.age.spell.SimpleReverseCorrector; import eu.irreality.age.spell.SpellingCorrector; public class NaturalLanguage { private static String defaultVerbPath = Paths.LANG_FILES_PATH + "/verbos.lan"; private static String defaultSynonymPath = Paths.LANG_FILES_PATH + "/sinon.lan"; private static String defaultAliasPath = Paths.LANG_FILES_PATH + "/alias.lan"; private static String defaultVerb32Path = Paths.LANG_FILES_PATH + "/verbos32.lan"; private static String defaultCommonWordPath = Paths.LANG_FILES_PATH + "/common.lan"; private Map imperativoAInfinitivo; private Map infinitivoAImperativo; private Map sinonimos; private Map terceraASegunda; //we keep these two kinds of aliases separate for efficiency when applying them: we could consider all aliases to be regex aliases, but it would be impractical to match all aliases against each string private Map simpleAliases; //aliases that are just words private Map regexAliases; //aliases that are regular expressions //common words that may appear in sentences even though they don't refer to world objects, and thus will not be subject to spelling correction private List commonWords; private SpellingCorrector verbCorrector; public static String DEFAULT_LANGUAGE_CODE = "es"; /** * The ISO code for this language. */ private String languageCode = null; /** * ISO language code. * @return */ public String getLanguageCode() { return languageCode; } /** * Obtain path to verb file. * @return */ String getVerbPath ( ) { if ( languageCode != null ) return Paths.LANG_FILES_PATH + "/" + languageCode + "/verbos.lan"; return defaultVerbPath; } /** * Obtain path to synonym file (unused). * @return */ String getSynonymPath ( ) { if ( languageCode != null ) return Paths.LANG_FILES_PATH + "/" + languageCode + "/sinon.lan"; return defaultSynonymPath; } /** * Obtain path to alias file. * @return */ String getAliasPath ( ) { if ( languageCode != null ) return Paths.LANG_FILES_PATH + "/" + languageCode + "/alias.lan"; return defaultAliasPath; } /** * Obtain path to verb 3rd to 2nd person file. * @return */ String getVerb32Path ( ) { if ( languageCode != null ) return Paths.LANG_FILES_PATH + "/" + languageCode + "/verbos32.lan"; return defaultVerb32Path; } /** * Obtain path to common word file. * @return */ String getCommonWordPath ( ) { if ( languageCode != null ) return Paths.LANG_FILES_PATH + "/" + languageCode + "/common.lan"; return defaultCommonWordPath; } /**Verbs that are considered guessable by second-chance mode * even if the guess policy is set to false * */ private Set guessable = new LinkedHashSet(); /** * Verbs that are considered not guessable in second-chance mode * even if the guess policy is set to true */ private Set unguessable = new LinkedHashSet(); /** * If true, all verbs are guessable unless in the unguessable set. * (default). * If false, all verbs are unguessable unless in the guessable set. */ private boolean defaultGuessPolicy = true; public void setUnguessable ( String verb ) { unguessable.add(verb); guessable.remove(verb); } public void setGuessable ( String verb ) { unguessable.remove(verb); guessable.add(verb); } public void setAllGuessable ( ) { unguessable.clear(); guessable.clear(); defaultGuessPolicy = true; } public void setAllUnguessable ( ) { unguessable.clear(); guessable.clear(); defaultGuessPolicy = false; } public boolean isGuessable ( String verb ) { if ( defaultGuessPolicy ) { return !unguessable.contains(verb); } else { return guessable.contains(verb); } } public static NaturalLanguage getInstance ( ) { return getInstance ( DEFAULT_LANGUAGE_CODE ); } public static NaturalLanguage getInstance ( String languageCode ) { if ( languageCode.equals("es") ) return new Spanish(); else if ( languageCode.equals("en") ) return new English(); else if ( languageCode.equals("eo") ) return new Esperanto(); else if ( languageCode.equals("gl") ) return new Galician(); else if ( languageCode.equals("ca") ) return new Catalan(); else return new NaturalLanguage(languageCode); } private NaturalLanguage ( ) { this( DEFAULT_LANGUAGE_CODE ); } protected NaturalLanguage ( String languageCode ) { this.languageCode = languageCode; //load the files needed for the natural language utils to work try { imperativoAInfinitivo = LanguageUtils.loadTableFromPath ( getVerbPath() , '=' ); infinitivoAImperativo = LanguageUtils.loadInvertedTableFromPath ( getVerbPath() , '=' , false ); //false: en este caso solo las primeras, i.e. a imperativo, no a 1� pers } catch ( Exception exc ) { //System.err.println("Aviso: no se ha encontrado fichero de verbos, la tabla de verbos estar� vac�a."); System.err.println(UIMessages.getInstance().getMessage("warning.no.verb.file")); exc.printStackTrace(); imperativoAInfinitivo = new Hashtable(1); } try { sinonimos = LanguageUtils.loadTableFromPath ( getSynonymPath() , '=' ); } catch ( Exception exc ) { //System.err.println("Aviso: no se ha encontrado fichero de sin�nimos, la tabla de sin�nimos estar� vac�a."); System.err.println(UIMessages.getInstance().getMessage("warning.no.syn.file")); sinonimos = new Hashtable(1); } try { simpleAliases = LanguageUtils.loadTableFromPath ( getAliasPath() , '=' ); } catch ( Exception exc ) { //System.err.println("Aviso: no se ha encontrado fichero de alias, la tabla de alias estar� vac�a."); System.err.println(UIMessages.getInstance().getMessage("warning.no.alias.file")); simpleAliases = new Hashtable(1); } try { terceraASegunda = LanguageUtils.loadTableFromPath ( getVerb32Path() , ' ' ); } catch ( Exception exc ) { //System.err.println("Aviso: no se ha encontrado fichero de conjugaci�n en 2� persona, la tabla estar� vac�a."); System.err.println(UIMessages.getInstance().getMessage("warning.no.2p.file")); terceraASegunda = new Hashtable(1); } try { commonWords = LanguageUtils.loadListFromPath( getCommonWordPath() ); } catch (Exception exc) { System.err.println(UIMessages.getInstance().getMessage("warning.no.common.file")); commonWords = new ArrayList(); } //split aliases into simple and regex aliases regexAliases = new LinkedHashMap(); for ( Iterator iter = simpleAliases.keySet().iterator() ; iter.hasNext() ; ) { String key = (String) iter.next(); if ( !isSimpleAliasString(key) ) { String value = (String) simpleAliases.get(key); iter.remove(); regexAliases.put(key,value); } } } /** * @deprecated Use {@link #toInfinitive(String)} instead */ public String imperativoAInfinitivo ( String presente ) { return toInfinitive(presente); } public String toInfinitive ( String presente ) { return (String) imperativoAInfinitivo.get ( presente.toLowerCase() ); } public String obtenerSinonimo ( String palabra ) { return (String) sinonimos.get ( palabra.toLowerCase() ); } /** * @deprecated Use {@link #getAlias(String)} instead */ public String obtenerAlias ( String palabra ) { return getAlias(palabra); } /** * Note that this only works on simple aliases, not on regex aliases. * @param palabra * @return */ public String getAlias ( String palabra ) { return (String) simpleAliases.get ( palabra.toLowerCase() ); } public String sustituirSinonimos ( String s ) { StringTokenizer st = new StringTokenizer ( s , " " , true ); String nueva = ""; while ( st.hasMoreTokens() ) { String tok = st.nextToken(); String sin = obtenerSinonimo(tok); if ( sin == null ) { nueva += tok; } else { nueva += sin; } } return nueva; } /** * @deprecated Use {@link #substituteVerbs(String)} instead */ public String sustituirVerbos ( String s ) { return substituteVerbs(s); } /** * Replaces every verb appearing in the sentence with its canonical form. * As of 2015-03-29, unused by the existing languages supported in AGE, but kept because it might be useful for future languages. */ public String substituteVerbs ( String s ) { StringTokenizer st = new StringTokenizer ( s , " " , true ); String nueva = ""; while ( st.hasMoreTokens() ) { String tok = st.nextToken(); String sin = toInfinitive(tok); if ( sin == null ) { nueva += tok; } else { nueva += sin; } } return nueva; } //sustituye verbo como en sustituirVerbos() pero s�lo si el verbo es la primera palabra /** * @deprecated Use {@link #substituteVerb(String)} instead */ public String sustituirVerbo ( String s ) { return substituteVerb(s); } //sustituye verbo como en substituteVerbs() pero s�lo si el verbo es la primera palabra public String substituteVerb ( String s ) { StringTokenizer st = new StringTokenizer ( s , " " , true ); String nueva = ""; int tokcnt = 0; while ( st.hasMoreTokens() ) { String tok = st.nextToken(); String sin = toInfinitive(tok); if ( sin == null || tokcnt > 0 ) { nueva += tok; } else { nueva += sin; } tokcnt++; } return nueva; } //devuelve true si la palabra dada es un posible comando. (ir, salir...) false si no. (mesa...) /** * @deprecated Use {@link #isVerb(String)} instead */ public boolean esVerboComando ( String s ) { return isVerb(s); } //devuelve true si la palabra dada es un posible comando. (ir, salir...) false si no. (mesa...) public boolean isVerb ( String s ) { if ( infinitivoAImperativo.get(s.toLowerCase().trim()) != null || imperativoAInfinitivo.get(s.toLowerCase().trim()) != null ) { return true; } else return false; } /** * Adds an entry to the verb table. * @param imperative Imperative or 1st person form of the verb. * @param infinitive Infinitive form of the verb. */ public void addVerbEntry ( String imperative , String infinitive ) { imperativoAInfinitivo.put(imperative,infinitive); infinitivoAImperativo.put(infinitive,imperative); } public void removeVerbEntry ( String imperative , String infinitive ) { imperativoAInfinitivo.remove(imperative); infinitivoAImperativo.remove(infinitive); } /** * Removes an verb in all of its forms, given the infinitive, if it exists. * @param source */ public void removeVerbEntry ( String infinitive ) { String imper = (String) infinitivoAImperativo.get(infinitive); infinitivoAImperativo.remove(infinitive); if ( imper != null ) { imperativoAInfinitivo.remove(imper); } } public Set getVerbForms() { return imperativoAInfinitivo.keySet(); } /** * Returns true if the string can be used as the source for a simple alias (i.e. it's a word). * If not, it is considered to be a regex alias. * @param s * @return */ private static boolean isSimpleAliasString ( String s ) { return s.matches("\\p{L}*"); //meaning a string of Unicode letters from any alphabet. } /** * Adds an entry to the aliases table. * @param source The source of the alias. * @param target The target of the alias. */ public void addAlias ( String source , String target ) { if ( isSimpleAliasString ( source ) ) simpleAliases.put(source,target); else regexAliases.put(source,target); } /** * Removes a (source,target) entry from the alias association if it exists. * @param source * @param target */ public void removeAlias ( String source , String target ) { if ( isSimpleAliasString ( source ) ) { if ( simpleAliases.get(source).equals(target) ) removeAlias(source); } else { if ( regexAliases.get(source).equals(target) ) removeAlias(source); } } /** * Removes an alias if it exists. * @param source */ public void removeAlias ( String source ) { if ( isSimpleAliasString ( source ) ) simpleAliases.remove(source); else regexAliases.remove(source); } /** * Devuelve Comprueba si una palabra dada es un verbo, incluyendo soporte de "le". * @param s Palabra a comprobar. * @param includeLe true si se quiere que se admitan como verbo formas con el sufijo le. * @return true si la palabra dada es un verbo reconocido (en imperativo, infinitivo o 1� persona). * Si el par�metro includeLe es true, entonces tambi�n devuelve true si es un verbo al que se ha * a�adido "le" (escupirle, dale, beberle) */ public boolean isVerb ( String s , boolean includeLe ) { if ( !includeLe ) return isVerb(s); else { if ( isVerb(s) ) return true; else { if ( s.endsWith("le") ) { String verbForm = s.substring(0,s.length()-2); return isVerb(verbForm); } else if ( s.endsWith("les") ) { String verbForm = s.substring(0,s.length()-3); return isVerb(verbForm); } else return false; } } } /**Holds the compiled pattern objects for regex aliases. The pattern for each regex alias is compiled the first time the alias is used.*/ private HashMap regexAliasPatterns = new HashMap(); /** * Applies the regex alias with the given source regex to the given string. * @param aliasSource * @param toApplyTo * @return */ private String applyRegexAlias ( String sourceRegex , String toApplyTo ) { String target = (String) regexAliases.get(sourceRegex); Pattern pattern = (Pattern) regexAliasPatterns.get(sourceRegex); if ( pattern == null ) { pattern = Pattern.compile(sourceRegex); regexAliasPatterns.put(sourceRegex, pattern); } Matcher matcher = pattern.matcher(toApplyTo); return matcher.replaceAll(target); } public String substituteAlias ( String s ) { //1. substitute with simple aliases StringTokenizer st = new StringTokenizer ( s , " " , true ); String nueva = ""; while ( st.hasMoreTokens() ) { String tok = st.nextToken(); String sin = getAlias(tok); if ( sin == null ) { nueva += tok; } else { nueva += sin; } } //2. substitute with regex aliases for ( Iterator iter = regexAliases.keySet().iterator() ; iter.hasNext() ; ) { String key = (String) iter.next(); nueva = applyRegexAlias(key,nueva); } return nueva; } public String terceraASegunda ( String verbo ) { return (String) terceraASegunda.get ( verbo.toLowerCase() ); } /** * By default, do nothing language-specific. Will be overridden by concrete languages. * @param s * @return * @deprecated Use {@link #correctMorphology(String)} instead */ public String gramaticalizar ( String s ) { return correctMorphology(s); } /** * By default, do nothing language-specific. Will be overridden by concrete languages. * @param s * @return */ public String correctMorphology ( String s ) { if ( s == null ) return null; String temp = s; temp = temp.trim(); if ( temp.length() > 0 ) temp = Character.toUpperCase(temp.charAt(0)) + temp.substring(1); if ( temp.length() > 0 ) if ( temp.charAt(temp.length()-1) != '.' ) temp += "."; return temp; } /** * By default, do nothing language-specific. Will be overridden by concrete languages. * @param s * @return * @deprecated Use {@link #correctMorphologyWithoutTrimming(String)} instead */ public String gramaticalizarSinTrimear ( String s ) { return correctMorphologyWithoutTrimming(s); } /** * By default, do nothing language-specific. Will be overridden by concrete languages. * @param s * @return */ public String correctMorphologyWithoutTrimming ( String s ) { if ( s == null ) return null; String temp = s; if ( temp.length() > 0 ) temp = Character.toUpperCase(temp.charAt(0)) + temp.substring(1); if ( temp.length() > 0 ) if ( temp.charAt(temp.length()-1) != '\n' && temp.charAt(temp.length()-1) != '.' ) temp += "."; return temp; } public String removeAccents ( String s ) { s = s.replaceAll("[����]","e"); s = s.replaceAll("[����]","u"); s = s.replaceAll("[���]","i"); s = s.replaceAll("[���]","a"); s = s.replaceAll("[���]","o"); s = s.replaceAll("[����]","E"); s = s.replaceAll("[����]","U"); s = s.replaceAll("[���]","I"); s = s.replaceAll("[���]","A"); s = s.replaceAll("[��]","O"); return s; } public void initVerbSpellingCorrector ( ) { verbCorrector = new SimpleReverseCorrector(); for ( Iterator iter = infinitivoAImperativo.keySet().iterator() ; iter.hasNext() ; ) { String nextWord = (String) iter.next(); verbCorrector.addDictionaryWord(nextWord); } for ( Iterator iter = imperativoAInfinitivo.keySet().iterator() ; iter.hasNext() ; ) { String nextWord = (String) iter.next(); verbCorrector.addDictionaryWord(nextWord); } for ( Iterator iter = simpleAliases.keySet().iterator() ; iter.hasNext() ; ) { String nextAlias = (String)iter.next(); StringTokenizer st = new StringTokenizer(nextAlias); //as of 2011-12-16 there are no multiword aliases, but there may be in the future if ( st.hasMoreTokens() ) { verbCorrector.addDictionaryWord(st.nextToken()); } } for ( Iterator iter = regexAliases.keySet().iterator() ; iter.hasNext() ; ) { String nextAlias = (String)iter.next(); nextAlias = nextAlias.replaceAll("\\p{P}", ""); //remove punctuation from the regex alias StringTokenizer st = new StringTokenizer(nextAlias); //tokenize to get words if ( st.hasMoreTokens() ) { verbCorrector.addDictionaryWord(st.nextToken()); } } //System.err.println(corrector); } /** * Gets a corrector for the reference names of the given world. * @param w * @return */ public ReferenceNameCorrector initNameCorrector ( World w ) { return new ReferenceNameCorrector ( w, new SimpleReverseCorrector() ); } private Correction getBestCorrection ( String tentativeVerb ) { if ( verbCorrector == null ) { if ( isVerb(tentativeVerb) ) return new Correction(tentativeVerb,0); else return null; } return verbCorrector.getBestCorrection(tentativeVerb); } /** * Changes a mistyped verb (1st word) in the given command string to a correct one. * @param commandString * @return */ public String correctVerb ( String commandString ) { if ( verbCorrector == null ) initVerbSpellingCorrector(); StringTokenizer st = new StringTokenizer ( commandString ); if ( !st.hasMoreTokens() ) return commandString; String verb = st.nextToken(); Correction c = getBestCorrection(verb); if ( verb.length() > 2 && c != null && c.getWord() != null ) //solo corregimos para length > 2 para evitar por ejemplo i->di { verb = c.getWord(); } if ( !st.hasMoreTokens() ) return verb; else return verb + " " + st.nextToken(""); } /** * Returns the language's verb spelling corrector (initialising it first if it was not * already initialised). */ public SpellingCorrector getVerbSpellingCorrector() { if ( verbCorrector == null ) initVerbSpellingCorrector(); return verbCorrector; } /** * By default, do nothing. Concrete languages will override this with their * pronoun handling. * @param p * @param command * @param mentions * @return */ public String substitutePronouns ( Mobile m , String command , Mentions mentions ) { return command; } /** * By default, do nothing. Concrete languages will override this with their * pronoun handling. * @param p * @param command * @param mentions * @return */ public String substitutePronounsIfVerb ( Mobile p , String command , Mentions mentions ) { return command; } /** * Translates a verb from this language into another, using the translation tables obtained from the corresponding files. * The nullIfNotFound parameter controls what happens when a translation is not available: if the parameter is true, then the method * returns null in that case, if it is false, then it returns the original verb. * @param verb * @param targetLanguage * @param nullIfNotFound * @return */ public String translateVerb ( String verb , String targetLanguage , boolean nullIfNotFound ) { if ( this.getLanguageCode().equals(targetLanguage) ) return verb; //translation from one language to itself String translation = Translation.translate( verb , this.getLanguageCode() , targetLanguage ); if ( translation == null ) translation = Translation.translate( verb.toLowerCase() , this.getLanguageCode() , targetLanguage ); if ( translation == null && !nullIfNotFound ) translation = verb; return translation; } /** * Translates a verb from this language into another, using the translation tables obtained from the corresponding files. * Returns the verb as it was if a translation is not found. * @param verb * @param targetLanguage * @return */ public String translateVerb ( String verb , String targetLanguage ) { return translateVerb ( verb , targetLanguage , false ); } /** * Extracts the verb (command) part from an imperative sentence. * The default behaviour assumes that the verb is the first word in the sentence. * @param sentence * @return */ public String extractVerb ( String sentence ) { StringTokenizer st = new StringTokenizer(sentence); if ( st.hasMoreTokens() ) return st.nextToken(); else return ""; } /** * Extracts everything except the verb (command) part from an imperative sentence. * The default behaviour assumes that the verb is the first word in the sentence, * and therefore returns the rest. * @param sentence * @return */ public String extractArguments ( String sentence ) { StringTokenizer st = new StringTokenizer(sentence); if ( !st.hasMoreTokens() ) return ""; else { st.nextToken(); if ( !st.hasMoreTokens() ) return ""; else return st.nextToken(""); } } /** * Returns the list of common words that are expected to appear in sentences even though they don't refer to * world objects. * @return */ public List getCommonWordsList () { return commonWords; } /** * Returns the default verb, i.e., verb that will be used by default if a reference name is typed * at the beginning of a game without specifying a verb. */ public String getDefaultVerb() { return "look"; } }