package com.jaivox.tools;
import java.io.*;
import java.util.*;
import com.jaivox.interpreter.Adjective;
import com.jaivox.util.Log;
/**
* Generates questions along with their semantic specifications. We generate
* questions along with specifications that tell us what to do with the
* question. When the user speaks, we try to match what is said with the
* questions in our system, and then we can look up these specifications to
* decide how to answer the question.
*/
public class Questgen {
Properties kv;
String grammarfile;
String redirectfile;
String tagsfile;
String infosfile;
String resultfile;
String srcdir;
String datadir;
Adjective Adj;
TreeMap <String, Infonode> infos;
String fields [];
Vector <String []> patterns;
Vector <String> patorig;
Vector <String []> redirpats;
Vector <String> redirorig;
Hashtable <String, String> redirspecs;
TreeMap <String, String []> gtags;
Vector <String> questions;
// static String yesno [] = {"is", "are", "was", "were", "do", "does", "how",
// "would", "will", "could", "can"};
// Vector <String> yesnospecs;
/**
* Generate questions from a set of properties in the configuration file.
@param keyval
*/
public Questgen (Properties keyval) {
kv = keyval;
String specdir = kv.getProperty ("source");
srcdir = kv.getProperty ("common");
String useonedir = kv.getProperty ("onedirectory");
String dest = kv.getProperty ("destination");
if (useonedir.equals ("true")) {
datadir = dest;
}
else {
datadir = dest + kv.getProperty ("dir_interpreter") + "/";
}
String gram = kv.getProperty ("grammar_file");
grammarfile = specdir + gram;
Grammar g = new Grammar (grammarfile);
patterns = g.patterns;
patorig = g.patorig;
String redir = kv.getProperty ("redirects_file");
redirectfile = srcdir + redir;
Grammar r = new Grammar (redirectfile);
redirpats = r.patterns;
redirorig = r.patorig;
redirspecs = r.specs;
String penntags = kv.getProperty ("penn_tags");
tagsfile = srcdir + penntags;
Tags t = new Tags (tagsfile);
gtags = t.gtags;
// yesnospecs = new Vector <String> ();
// for (int i=0; i<yesno.length; i++) {
// yesnospecs.add (yesno [i]);
// }
Adj = new Adjective ();
infos = new TreeMap <String, Infonode> ();
String specs = kv.getProperty ("specs_file");
infosfile = specdir + specs;
loadinfos (specdir, infosfile);
Check c = new Check (this);
c.checkAll ();
questions = new Vector <String> ();
String qq = kv.getProperty ("questions_file");
resultfile = datadir + qq;
}
void loadinfos (String specdir, String filename) {
try {
BufferedReader in = new BufferedReader (new FileReader (filename));
String line;
Vector <String> hold;
int nfield = 0;
outer:
while ((line = in.readLine ()) != null) {
String rest = line.trim ();
if (rest.startsWith ("//")) continue;
if (rest.startsWith ("{")) {
hold = new Vector <String> ();
while ((line = in.readLine ()) != null) {
rest = line.trim ();
if (rest.startsWith ("//")) continue;
hold.add (rest);
if (rest.endsWith ("}")) {
Infonode node = new Infonode (specdir, hold);
node.buildAdjectives (Adj);
infos.put (node.name, node);
String [] types = node.tagval.get ("type");
if (types == null) {
Log.severe ("No type for node "+node.name);
}
else {
if (types [0].equals ("field")) nfield++;
}
continue outer;
}
}
}
}
in.close ();
Log.info ("Created "+infos.size ()+" infos");
fields = new String [nfield];
Set <String> keys = infos.keySet ();
int i = 0;
for (Iterator<String> it = keys.iterator (); it.hasNext (); ) {
String key = it.next ();
Infonode sp = infos.get (key);
String [] types = sp.tagval.get ("type");
if (types != null && types [0].equals ("field")) {
fields [i++] = key;
}
}
}
catch (Exception e) {
e.printStackTrace ();
}
}
public void generate () {
questions = new Vector <String> ();
for (int i=0; i<fields.length; i++) {
String field = fields [i];
generatefield (field);
}
}
void generatefield (String field) {
Infonode finfo = infos.get (field);
String [] attrs = finfo.tagval.get ("attributes");
for (int i=0; i<attrs.length; i++) {
String attribute = attrs [i];
generatefieldattribute (field, attribute);
}
}
void generatefieldattribute (String field, String attribute) {
Infonode finfo = infos.get (field);
Infonode ainfo = infos.get (attribute);
for (int i=0; i<patterns.size (); i++) {
String pat [] = patterns.elementAt (i);
String pato = patorig.elementAt (i);
Log.fine (finfo.name+"."+ainfo.name+" "+pato);
generatepattern (finfo, ainfo, pat);
}
for (int i=0; i<redirpats.size (); i++) {
String pat [] = redirpats.elementAt (i);
String pato = redirorig.elementAt (i);
Log.fine (finfo.name+"."+ainfo.name+" "+pato);
generateredirect (finfo, ainfo, pat, pato);
}
}
void generatepattern (Infonode finfo, Infonode ainfo, String pat []) {
int n = pat.length;
String q [] = new String [n];
gt (finfo, ainfo, 0, pat, q);
}
void gt (Infonode finfo, Infonode ainfo, int stage, String pat [], String q []) {
int n = pat.length;
StringBuffer sb = new StringBuffer ();
for (int i=0; i<stage; i++) {
sb.append (q [i]);
// blanks are also tokens
// if (i < n-1) sb.append (' ');
}
String quest = new String (sb);
Log.finest ("gt:"+stage+" "+quest);
if (stage >= n) { // done
String selection = getselection (finfo, ainfo, pat, q);
String out = quest + "\t" + selection;
if (questions.indexOf (out) == -1) questions.add (out);
return;
}
// consider the current pat
String gtag = pat [stage];
// any lower case pattern is passed through
if (gtag.equals (gtag.toLowerCase ())) {
q [stage] = gtag;
gt (finfo, ainfo, stage+1, pat, q);
}
/*
else if (gtag.startsWith ("W")) {
String whquestions [] = finfo.tagval.get ("wh");
if (whquestions != null) {
for (int i=0; i<whquestions.length; i++) {
String whquestion = whquestions [i];
q [stage] = whquestion;
gt (finfo, ainfo, stage+1, pat, q);
}
}
else return;
}*/
else if (gtag.equals ("VBZ")) {
q [stage] = "is";
gt (finfo, ainfo, stage+1, pat, q);
}
else if (gtag.equals ("VBP")) {
q [stage] = "are";
gt (finfo, ainfo, stage+1, pat, q);
}
else if (finfo.tagval.get (gtag) != null || ainfo.tagval.get (gtag) != null) {
// else if (gtag.startsWith ("N") || gtag.startsWith ("J") || gtag.startsWith ("R")) {
String [] words = finfo.tagval.get (gtag);
if (words == null) words = ainfo.tagval.get (gtag);
if (words != null) {
for (int i=0; i<words.length; i++) {
String word = words [i];
q [stage] = word;
gt (finfo, ainfo, stage+1, pat, q);
}
}
else return;
}
else {
String options [] = gtags.get (gtag);
if (options == null) {
Log.severe ("No options for Grammar tag "+gtag);
return;
}
int m = options.length;
for (int j=0; j<m; j++) {
q [stage] = options [j];
gt (finfo, ainfo, stage+1, pat, q);
}
}
}
String getselection (Infonode finfo, Infonode ainfo, String pat [], String q []) {
String field = finfo.name;
String attr = ainfo.name;
// to see if it is a followup see if field or attribute is unspecified
String quant = "_";
// just find the adjective
int n = pat.length;
String action = "(find, ";
if (!pat [0].startsWith ("W")) {
boolean foundelse = false;
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("ELS")) {
foundelse = true;
break;
}
}
if (!foundelse) action = "(ask, ";
}
// if (yesnospecs.indexOf (pat [0]) != -1)
// action = "(ask, ";
boolean foundField = false;
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("NN")) {
foundField = true;
break;
}
String word = q [i];
if (word.startsWith (field)) {
foundField = true;
}
}
if (!foundField) field = "_";
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("JJ")) quant = p;
}
// adverbial terms
String adverb = "";
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("RB")) adverb = adverb+", "+p;
}
// proper names
String nnp = "";
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("NNP")) nnp = nnp+", NNP: "+q [i];
}
// else
String els = "";
for (int i=0; i<n; i++) {
String p = pat [i];
if (p.startsWith ("ELS")) els = els+", ELS: "+q[i];
}
if (quant.equals ("_")) attr = "_";
// note nnp will contain a space if it contains anything
String s = action+field+", "+attr+", "+quant+adverb+nnp+els+")";
return s;
}
void generateredirect (Infonode finfo, Infonode ainfo, String pat [], String orig) {
int n = pat.length;
String q [] = new String [n];
gtr (finfo, ainfo, 0, pat, q, orig);
}
void gtr (Infonode finfo, Infonode ainfo, int stage, String pat [], String q [], String orig) {
int n = pat.length;
StringBuffer sb = new StringBuffer ();
for (int i=0; i<stage; i++) {
sb.append (q [i]);
if (i < n-1) sb.append (' ');
}
String quest = new String (sb);
Log.finest ("gtr:"+stage+" "+quest);
if (stage >= n) { // done
String selection = redirspecs.get (orig);
if (selection != null) {
String out = quest + "\t" + selection;
if (questions.indexOf (out) == -1) questions.add (out);
}
return;
}
// consider the current pat
String gtag = pat [stage];
// any lower case pattern is passed through
if (gtag.equals (gtag.toLowerCase ())) {
q [stage] = gtag;
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
else if (gtag.equals ("VBZ")) {
q [stage] = "is";
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
else if (gtag.equals ("VBP")) {
q [stage] = "are";
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
else if (finfo.tagval.get (gtag) != null || ainfo.tagval.get (gtag) != null) {
String [] words = finfo.tagval.get (gtag);
if (words == null) words = ainfo.tagval.get (gtag);
if (words != null) {
for (int i=0; i<words.length; i++) {
String word = words [i];
q [stage] = word;
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
}
else return;
}
else {
String options [] = gtags.get (gtag);
if (options == null) {
Log.severe ("No options for Grammar tag "+gtag);
return;
}
int m = options.length;
for (int j=0; j<m; j++) {
q [stage] = options [j];
gtr (finfo, ainfo, stage+1, pat, q, orig);
}
}
}
public void saveQuestions () {
try {
PrintWriter out = new PrintWriter (new FileWriter (resultfile));
for (int i=0; i<questions.size (); i++) {
out.println (questions.elementAt (i));
}
Log.info (""+questions.size ()+" questions saved in "+resultfile);
out.close ();
}
catch (Exception e) {
e.printStackTrace ();
}
}
};