/*
Copyright 2010-2012 by Bits and Pixels, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.jaivox.tools;
import java.io.*;
import java.util.*;
import com.jaivox.interpreter.Adjective;
import com.jaivox.util.Log;
/**
* Generates questions along with their semantic specifications. We generate
* questions along with specifications that tell us what to do with the
* question. When the user speaks, we try to match what is said with the
* questions in our system, and then we can look up these specifications to
* decide how to answer the question.
*/
public class Questgen {
Properties kv;
String grammarfile;
String redirectfile;
String tagsfile;
String infosfile;
String resultfile;
String srcdir;
String datadir;
Adjective Adj;
TreeMap <String, Infonode> infos;
String fields [];
Vector <String []> patterns;
Vector <String> patorig;
Vector <String []> redirpats;
Vector <String> redirorig;
Hashtable <String, String> redirspecs;
TreeMap <String, String []> gtags;
Vector <String> questions;
static String yesno [] = {"is", "are", "was", "were", "do", "does", "how",
"would", "will", "could", "can"};
Vector <String> yesnospecs;
/**
* Generate questions from a set of properties in the configuration file.
@param keyval
*/
public Questgen (Properties keyval) {
kv = keyval;
String specdir = kv.getProperty ("source");
srcdir = kv.getProperty ("common");
String useonedir = kv.getProperty ("onedirectory");
String dest = kv.getProperty ("destination");
if (useonedir.equals ("true")) {
datadir = dest;
}
else {
datadir = dest + kv.getProperty ("dir_interpreter") + "/";
}
String gram = kv.getProperty ("grammar_file");
grammarfile = specdir + gram;
Grammar g = new Grammar (grammarfile);
patterns = g.patterns;
patorig = g.patorig;
String redir = kv.getProperty ("redirects_file");
redirectfile = srcdir + redir;
Grammar r = new Grammar (redirectfile);
redirpats = r.patterns;
redirorig = r.patorig;
redirspecs = r.specs;
String penntags = kv.getProperty ("penn_tags");
tagsfile = srcdir + penntags;
Tags t = new Tags (tagsfile);
gtags = t.gtags;
yesnospecs = new Vector <String> ();
for (int i=0; i<yesno.length; i++) {
yesnospecs.add (yesno [i]);
}
Adj = new Adjective ();
infos = new TreeMap <String, Infonode> ();
String specs = kv.getProperty ("specs_file");
infosfile = specdir + specs;
loadinfos (specdir, infosfile);
Check c = new Check (this);
c.checkAll ();
questions = new Vector <String> ();
String qq = kv.getProperty ("questions_file");
resultfile = datadir + qq;
}
void loadinfos (String specdir, String filename) {
try {
BufferedReader in = new BufferedReader (new FileReader (filename));
String line;
Vector <String> hold;
int nfield = 0;
outer:
while ((line = in.readLine ()) != null) {
String rest = line.trim ();
if (rest.startsWith ("//")) continue;
if (rest.startsWith ("{")) {
hold = new Vector <String> ();
while ((line = in.readLine ()) != null) {
rest = line.trim ();
if (rest.startsWith ("//")) continue;
hold.add (rest);
if (rest.endsWith ("}")) {
Infonode node = new Infonode (specdir, hold);
node.buildAdjectives (Adj);
infos.put (node.name, node);
String [] types = node.tagval.get ("type");
if (types == null) {
Log.severe ("No type for node "+node.name);
}
else {
if (types [0].equals ("field")) nfield++;
}
continue outer;
}
}
}
}
in.close ();
Log.info ("Created "+infos.size ()+" infos");
fields = new String [nfield];
Set <String> keys = infos.keySet ();
int i = 0;
for (Iterator<String> it = keys.iterator (); it.hasNext (); ) {
String key = it.next ();
Infonode sp = infos.get (key);
String [] types = sp.tagval.get ("type");
if (types != null && types [0].equals ("field")) {
fields [i++] = key;
}
}
}
catch (Exception e) {
e.printStackTrace ();
}
}
void generate () {
questions = new Vector <String> ();
for (int i=0; i<fields.length; i++) {
String field = fields [i];
generatefield (field);
}
}
void generatefield (String field) {
Infonode finfo = infos.get (field);
String [] attrs = finfo.tagval.get ("attributes");
for (int i=0; i<attrs.length; i++) {
String attribute = attrs [i];
generatefieldattribute (field, attribute);
}
}
void generatefieldattribute (String field, String attribute) {
Infonode finfo = infos.get (field);
Infonode ainfo = infos.get (attribute);
for (int i=0; i<patterns.size (); i++) {
String pat [] = patterns.elementAt (i);
String pato = patorig.elementAt (i);
Log.fine (finfo.name+"."+ainfo.name+" "+pato);
generatepattern (finfo, ainfo, pat);
}
for (int i=0; i<redirpats.size (); i++) {
String pat [] = redirpats.elementAt (i);
String pato = redirorig.elementAt (i);
Log.fine (finfo.name+"."+ainfo.name+" "+pato);
generateredirect (finfo, ainfo, pat, pato);
}
}
void generatepattern (Infonode finfo, Infonode ainfo, String pat []) {
int n = pat.length;
String q [] = new String [n];
gt (finfo, ainfo, 0, pat, q);
}
void gt (Infonode finfo, Infonode ainfo, int stage, String pat [], String q []) {
int n = pat.length;
StringBuffer sb = new StringBuffer ();
for (int i=0; i<stage; i++) {
sb.append (q [i]);
// blanks are also tokens
// if (i < n-1) sb.append (' ');
}
String quest = new String (sb);
Log.finest ("gt:"+stage+" "+quest);
if (stage >= n) { // done
String selection = getselection (finfo, ainfo, pat, q);
String out = quest + "\t" + selection;
if (questions.indexOf (out) == -1) questions.add (out);
return;
}
// consider the current pat
String gtag = pat [stage];
// any lower case pattern is passed through
if (gtag.equals (gtag.toLowerCase ())) {
q [stage] = gtag;
gt (finfo, ainfo, stage+1, pat, q);
}
/*
else if (gtag.startsWith ("W")) {
String whquestions [] = finfo.tagval.get ("wh");
if (whquestions != null) {
for (int i=0; i<whquestions.length; i++) {
String whquestion = whquestions [i];
q [stage] = whquestion;
gt (finfo, ainfo, stage+1, pat, q);
}
}
else return;
}*/
else if (gtag.equals ("VBZ")) {
q [stage] = "is";
gt (finfo, ainfo, stage+1, pat, q);
}
else if (gtag.equals ("VBP")) {
q [stage] = "are";
gt (finfo, ainfo, stage+1, pat, q);
}
else if (finfo.tagval.get (gtag) != null || ainfo.tagval.get (gtag) != null) {
// else if (gtag.startsWith ("N") || gtag.startsWith ("J") || gtag.startsWith ("R")) {
String [] words = finfo.tagval.get (gtag);
if (words == null) words = ainfo.tagval.get (gtag);
if (words != null) {
for (int i=0; i<words.length; i++) {
String word = words [i];
q [stage] = word;
gt (finfo, ainfo, stage+1, pat, q);
}
}
else return;
}
else {
String options [] = gtags.get (gtag);
if (options == null) {
Log.severe ("No options for Grammar tag "+gtag);
return;
}
int m = options.length;
for (int j=0; j<m; j++) {
q [stage] = options [j];
gt (finfo, ainfo, stage+1, pat, q);
}
}
}
String getselection (Infonode finfo, Infonode ainfo, String pat [], String q []) {
String field = finfo.name;
String attr = ainfo.name;
// to see if it is a followup see if field or attribute is unspecified
String quant = "_";
// just find the adjective
int n = pat.length;
String action = "(find, ";
if (yesnospecs.indexOf (pat [0]) != -1)
action = "(ask, ";
boolean foundField = false;
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("NN")) {
foundField = true;
break;
}
String word = q [i];
if (word.startsWith (field)) {
foundField = true;
}
}
if (!foundField) field = "_";
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("JJ")) quant = p;
}
// adverbial terms
String adverb = "";
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("RB")) adverb = adverb+", "+p;
}
// proper names
String nnp = "";
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("NNP")) nnp = nnp+", NNP: "+q [i];
}
// else
String els = "";
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("ELS")) els = els+", ELS: "+q[i];
}
if (quant.equals ("_")) attr = "_";
// note nnp will contain a space if it contains anything
String s = action+field+", "+attr+", "+quant+adverb+nnp+els+")";
return s;
}
void generateredirect (Infonode finfo, Infonode ainfo, String pat [], String orig) {
int n = pat.length;
String q [] = new String [n];
gtr (finfo, ainfo, 0, pat, q, orig);
}
void gtr (Infonode finfo, Infonode ainfo, int stage, String pat [], String q [], String orig) {
int n = pat.length;
StringBuffer sb = new StringBuffer ();
for (int i=0; i<stage; i++) {
sb.append (q [i]);
if (i < n-1) sb.append (' ');
}
String quest = new String (sb);
Log.finest ("gtr:"+stage+" "+quest);
if (stage >= n) { // done
String selection = redirspecs.get (orig);
if (selection != null) {
String out = quest + "\t" + selection;
if (questions.indexOf (out) == -1) questions.add (out);
}
return;
}
// consider the current pat
String gtag = pat [stage];
// any lower case pattern is passed through
if (gtag.equals (gtag.toLowerCase ())) {
q [stage] = gtag;
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
else if (gtag.equals ("VBZ")) {
q [stage] = "is";
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
else if (gtag.equals ("VBP")) {
q [stage] = "are";
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
else if (finfo.tagval.get (gtag) != null || ainfo.tagval.get (gtag) != null) {
String [] words = finfo.tagval.get (gtag);
if (words == null) words = ainfo.tagval.get (gtag);
if (words != null) {
for (int i=0; i<words.length; i++) {
String word = words [i];
q [stage] = word;
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
}
else return;
}
else {
String options [] = gtags.get (gtag);
if (options == null) {
Log.severe ("No options for Grammar tag "+gtag);
return;
}
int m = options.length;
for (int j=0; j<m; j++) {
q [stage] = options [j];
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
}
}
void saveQuestions () {
try {
PrintWriter out = new PrintWriter (new FileWriter (resultfile));
for (int i=0; i<questions.size (); i++) {
out.println (questions.elementAt (i));
}
Log.info (""+questions.size ()+" questions saved in "+resultfile);
out.close ();
}
catch (Exception e) {
e.printStackTrace ();
}
}
};