/* * Copyright 2011 Stefan Partusch * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.spartusch.nasfvi.server; import java.io.File; import java.util.Iterator; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger; import jpl.Atom; import jpl.Query; import jpl.Term; import jpl.Util; import jpl.Variable; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.queryParser.core.QueryNodeException; import de.spartusch.StringMethods; /** * Interface to the natural language grammar. This class provides access to * the natural language grammar implemented in Prolog. This implementation * uses SWI-Prolog's JPL. The system property <code>java.library.path</code> * must include a path to SWI-Prolog for this implementation to work properly. * @author Stefan Partusch * @see <a href="http://www.swi-prolog.org/">www.swi-prolog.org</a> * */ public class Grammar { /** Grammatical tenses supported by the natural language grammar. */ public enum Tense { pqperf, perf, praet, praes, fut1 }; private static final Logger LOGGER = Logger.getLogger(Grammar.class.getName()); /** * Loads the natural language grammar from a file. Consults a single * file to an instance of SWI-Prolog. <code>file</code> must reference * all required Prolog source files. * @param file File to load the natural language grammar from */ public Grammar(final File file) { LOGGER.info("Using grammar " + file); Query consult = new Query("consult", new Term[] { new Atom(file.getAbsolutePath()) }); if (!consult.hasSolution()) { String msg = "Consulting " + file + " failed"; LOGGER.severe(msg); throw new RuntimeException(msg); } } /** * Solves a Prolog goal and logs this. Returns the bindings of the goal's * variables. Each binding maps a variable's name to its bound term. * @param goal Goal to solve * @return Bindings of the goal's variables */ @SuppressWarnings("unchecked") private Map<String, Term> solve(final Query goal) { Map<String, Term> bindings = (Map<String, Term>) goal.oneSolution(); if (LOGGER.isLoggable(Level.INFO)) { StringBuilder sb = new StringBuilder(); sb.append(goal); if (bindings != null) { for (Map.Entry<String, Term> entry : bindings.entrySet()) { String value = entry.getValue().toString(); if (value.length() < 100) { sb.append("\n\t").append(entry.getKey()); sb.append(" = ").append(value); } } } else { sb.append("\nNo solution"); } LOGGER.info(sb.toString()); } return bindings; } /** * Completes the input to sentences. This implementation calls * <code>suggest/4</code> in the natural language grammar. * @param input Input to suggest sentences for * @return Suggestions for the input */ public final Set<String> suggest(final String input) { Set<String> suggestions = new TreeSet<String>(); Term[] args = new Term[] { new Atom(input), new jpl.Integer(8), new Variable("Markiertheit"), new Variable("Vorschlaege") }; Map<String, Term> bindings = solve(new Query("suggest", args)); if (bindings == null) { return suggestions; } for (Term t : Util.listToTermArray(bindings.get("Vorschlaege"))) { String[] tokens = Util.atomListToStringArray(t); suggestions.add(fromProlog(tokens).toString()); } return suggestions; } /** * Analyzes a sentence and creates a {@link NQuery} accordingly. This * implementation calls <code>parse/5</code> in the natural language * grammar. * @param input The sentence to analyze * @param analyzer Analyzer to use when creating the <code>NQuery</code> * @return A <code>NQuery</code> according to the <code>input</code> * @throws QueryNodeException If creating the <code>NQuery</code> fails */ public final NQuery parse(final String input, final Analyzer analyzer) throws QueryNodeException { Term[] args = new Term[] { new Atom(input), new Variable("Tempus"), new Variable("Query"), new Variable("SimilQuery"), new Variable("Gesucht") }; Map<String, Term> bindings = solve(new Query("parse", args)); if (bindings == null) { return null; } String tempus = bindings.get("Tempus").toString(); String query = bindings.get("Query").toString(); String similQuery = bindings.get("SimilQuery").toString(); String[] fields = Util.atomListToStringArray(bindings.get("Gesucht")); return new NQuery(Tense.valueOf(tempus), query, similQuery, fields, analyzer); } /** * Generates an answer to a question. The input is analyzed to obtain a * skeleton to create the answer in natural language with by inserting * the <code>answerValues</code>. The <code>answerValues</code> are a * mapping from field names to the values of the fields. This * implementation calls <code>beantworte/5</code> in the natural language * grammar. * @param input The question in natural language to answer * @param answerValues Values to insert into the answer * @return Answer in natural language to <code>input</code> */ public final String generate(final String input, final Map<String, Set<String>> answerValues) { Term[] termValues = new Term[answerValues.size()]; int i = 0; for (Map.Entry<String, Set<String>> e : answerValues.entrySet()) { String key = e.getKey(); String[] arr = new String[e.getValue().size() + 1]; Iterator<String> iter = e.getValue().iterator(); arr[0] = key; for (int j = 1; iter.hasNext(); j++) { String value = iter.next(); if (StringMethods.equalsOneOf(key, new String[]{"semester", "tag"})) { arr[j] = value.toLowerCase(Locale.GERMAN); } else { arr[j] = "\"" + value + "\""; } } termValues[i] = Util.stringArrayToList(arr); i++; } Term[] args = new Term[] { new Atom(input), Util.termArrayToList(termValues), new Variable("AnalyseAnfrage"), new Variable("AnalyseAntwort"), new Variable("Antwort") }; Map<String, Term> bindings = solve(new Query("beantworte", args)); if (bindings == null) { throw new AssertionError("No bindings received"); } String analysisReq = prettyPrint(bindings.get("AnalyseAnfrage")); String analysisAns = prettyPrint(bindings.get("AnalyseAntwort")); String[] ansTokens = Util.atomListToStringArray(bindings.get("Antwort")); String answer = toJsonString(fromProlog(ansTokens), true); StringBuilder sb = new StringBuilder("{\n\"AnalysisReq\": "); sb.append(analysisReq).append(",\n\"AnalysisAns\": "); sb.append(analysisAns).append(",\n\"Answer\": "); sb.append(answer).append("\n}\n"); return sb.toString(); } /** * Pretty-prints a Prolog list to a string. This implementation calls * {@link #prettyPrint(Term)} on each item of <code>list</code> * recursively and ignores the final empty list. * @param list List to print * @return Pretty-printed Prolog list */ private String prettyPrintList(final Term list) { if (list.isCompound() && list.hasFunctor(".", 2)) { String head = prettyPrint(list.arg(1)); String tail = prettyPrintList(list.arg(2)); if (tail.equals("[]") || tail.isEmpty()) { return head; } return head + ", " + tail; } return ""; } /** * Pretty-prints a Prolog term to JSON. This implementation handles the * operators defined in the natural language grammar ('?', '-', '>', '*', * 'und', 'oder', 'lam', 'qu', 'ex') properly. * @param term Term to pretty-print * @return Pretty-printed term in JSON */ private String prettyPrint(final Term term) { String result = null; if (term.isCompound()) { if (term.hasFunctor(".", 2)) { String head = prettyPrint(term.arg(1)); String tail = prettyPrintList(term.arg(2)); if (tail.isEmpty()) { return "[" + head + "]"; } return "[" + head + ", " + tail + "]"; // no 'JSONification' because prolog lists are valid JSON arrays } else if (term.hasFunctor("?", 1)) { result = "?" + prettyPrint(term.arg(1)); } else if (term.hasFunctor("-", 2)) { result = prettyPrint(term.arg(1)) + "-" + prettyPrint(term.arg(2)); } else if (term.arity() == 2) { String op = term.name(); Term t1 = term.arg(1); Term t2 = term.arg(2); if (StringMethods.equalsOneOf(op, new String[]{"und", "oder", "*"})) { result = prettyPrint(t1) + " " + op + " " + prettyPrint(t2); } else if (StringMethods.equalsOneOf(op, new String[]{"lam", "qu", "ex"})) { result = op + "(" + t1.toString() + ", " + prettyPrint(t2) + ")"; } else if (op.equals(">")) { result = prettyPrint(t1) + op + prettyPrint(t2); } } } if (result == null) { result = term.toString(); } return toJsonString(result, false); } /** * Converts a string to a JSON string. * @param string String to convert * @param escape true to escape quotation marks, false to remove * quotation marks * @return A JSON string */ public static String toJsonString(final String string, final boolean escape) { return toJsonString(new StringBuilder(string), escape); } /** * Creates a JSON string from a StringBuilder. The data of the * StringBuilder is converted in-place to JSON. * @param builder StringBuilder to use * @param escape true to escape quotation marks, false to remove * quotation marks * @return A JSON string */ public static String toJsonString(final StringBuilder builder, final boolean escape) { int pos = -1; while ((pos = builder.indexOf("\"", pos + 1)) != -1) { if (escape && (pos == 0 || (pos > 1 && builder.charAt(pos - 1) != '\\'))) { builder.insert(pos, '\\'); } else if (!escape) { builder.deleteCharAt(pos); } } builder.insert(0, '"'); builder.append('"'); return builder.toString(); } /** * Processes a natural language sentence returned from Prolog. This method * concatenates each token, restores German umlauts, quotes blackboxes and * converts the first character to uppercase. * @param tokens Tokens of the sentence * @return Processed sentence * @see {@link #toProlog(String) toProlog} */ private static StringBuilder fromProlog(final String[] tokens) { StringBuilder sb = new StringBuilder(); for (String tok : tokens) { if (tok.charAt(0) != '"') { // Token is no black box tok = tok.replace("Ae", "\u00C4"); tok = tok.replace("Oe", "\u00D6"); tok = tok.replace("Ue", "\u00DC"); tok = tok.replace("ae", "\u00E4"); tok = tok.replace("oe", "\u00F6"); tok = tok.replaceAll("ue(?!n\\b)", "\u00FC"); tok = tok.replace("ss", "\u00DF"); sb.append(tok).append(" "); } else { // Token is black box boolean quote = tok.indexOf(' ') != -1; // test if multiple words int start = 1; if (tok.charAt(start) == '#') { start++; // skip # } Character upper = Character.toUpperCase(tok.charAt(start)); if (quote) { sb.append('"'); } sb.append(upper); sb.append(tok.substring(start + 1, tok.length() - 1)); if (quote) { sb.append('"'); } sb.append(" "); } } Character first = sb.charAt(0); sb.deleteCharAt(0); sb.insert(0, Character.toUpperCase(first)); return sb; } /** * Normalizes input for use with the natural language grammar in Prolog. * This method converts the input to lowercase, encodes German umlauts * with ASCII characters (ae, oe, ue, ...) and removes meta-characters not * suitable for processing by the natural language grammar. * @param input Input to normalize * @return Normalized <code>input</code> * @see {@link #fromProlog(String[]) fromProlog} */ public static String toProlog(final String input) { StringBuilder sb = new StringBuilder(); boolean inBlackBox = false; for (int i = 0; i < input.length(); i++) { int ch = input.codePointAt(i); if (ch != '"') { if (!inBlackBox) { ch = Character.toLowerCase(ch); switch(ch) { case '\u00E4': sb.append("ae"); break; case '\u00F6': sb.append("oe"); break; case '\u00FC': sb.append("ue"); break; case '\u00DF': sb.append("ss"); break; case '\\': sb.append("/"); break; default: if (Character.isLetterOrDigit(ch) || ch == '/' || ch == '.' || ch == '-') { sb.appendCodePoint(ch); } else { sb.append(" "); } } } else { sb.appendCodePoint(ch); } } else { inBlackBox = !inBlackBox; sb.append('"'); } } return sb.toString().trim(); } }