Sphinx.java example

Explorer
myrobotlab-master
- src
- test
  - ArduinoChaosTest.java
  - ArduinoMotorPotTest.java
  - org
    - myrobotlab
/**
 *                    
 * @author greg (at) myrobotlab.org
 *  
 * This file is part of MyRobotLab (http://myrobotlab.org).
 *
 * MyRobotLab is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version (subject to the "Classpath" exzception
 * as provided in the LICENSE.txt file that accompanied this code).
 *
 * MyRobotLab is distributed in the hope that it will be useful or fun,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * All libraries in thirdParty bundle are subject to their own license
 * requirements - please refer to http://myrobotlab.org/libraries for 
 * details.
 * 
 * Enjoy !
 * 
 * Dependencies:
 * sphinx4-1.0beta6
 * google recognition - a network connection is required
 * 
 * References:
 * Swapping Grammars - http://cmusphinx.sourceforge.net/wiki/sphinx4:swappinggrammars
 * 
 * http://cmusphinx.sourceforge.net/sphinx4/javadoc/edu/cmu/sphinx/jsgf/JSGFGrammar.html#loadJSGF(java.lang.String)
 * TODO - loadJSGF - The JSGF grammar specified by grammarName will be loaded from the base url (tossing out any previously loaded grammars)
 * 
 * 
 * */

package org.myrobotlab.service;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;

import org.apache.commons.lang.StringUtils;
import org.myrobotlab.framework.Service;
import org.myrobotlab.framework.ServiceType;
import org.myrobotlab.io.FileIO;
import org.myrobotlab.logging.Level;
import org.myrobotlab.logging.LoggerFactory;
import org.myrobotlab.logging.Logging;
import org.myrobotlab.logging.LoggingFactory;
import org.myrobotlab.service.interfaces.SpeechRecognizer;
import org.myrobotlab.service.interfaces.SpeechSynthesis;
import org.myrobotlab.service.interfaces.TextListener;
import org.myrobotlab.service.interfaces.TextPublisher;
import org.slf4j.Logger;

import edu.cmu.sphinx.frontend.util.Microphone;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;
import edu.cmu.sphinx.util.props.PropertyException;

/**
 * 
 * Sphinx - Speech recognition based on CMU Sphinx. This service must be told
 * what it's listening for. It does not do free-form speech recognition.
 * 
 */
public class Sphinx extends Service implements SpeechRecognizer, TextPublisher {

  /**
   * Commands must be created "before" startListening startListening will create
   * a grammar file from the data
   *
   */
  public class Command {
    public String name;
    public String method;
    public Object[] params;

    Command(String name, String method, Object[] params) {
      this.name = name;
      this.method = method;
      this.params = params;
    }
  }

  class SpeechProcessor extends Thread {
    Sphinx myService = null;
    public boolean isRunning = false;

    public SpeechProcessor(Sphinx myService) {
      super(myService.getName() + "_ear");
      this.myService = myService;
    }

    @Override
    public void run() {

      try {
        isRunning = true;

        info(String.format("starting speech processor thread %s_ear", myService.getName()));

        String newPath = cfgDir + File.separator + myService.getName() + ".xml";
        File localGramFile = new File(newPath);

        info("loading grammar file");
        if (localGramFile.exists()) {
          info(String.format("grammar config %s", newPath));
          cm = new ConfigurationManager(newPath);
        } else {
          // resource in jar default
          info(String.format("grammar /resource/Sphinx/simple.xml"));
          cm = new ConfigurationManager(this.getClass().getResource("/resource/Sphinx/simple.xml"));
        }

        info("starting recognizer");
        // start the word recognizer
        recognizer = (Recognizer) cm.lookup("recognizer");
        recognizer.allocate();

        info("starting microphone");
        microphone = (Microphone) cm.lookup("microphone");
        if (!microphone.startRecording()) {
          log.error("Cannot start microphone.");
          recognizer.deallocate();
        }

        // loop the recognition until the program exits.
        isListening = true;
        while (isRunning) {

          info("listening: %b", isListening);
          invoke("listeningEvent");
          Result result = recognizer.recognize();

          if (!isListening) {
            // we could have stopped listening
            Thread.sleep(250);
            continue;
          }

          log.info("Recognized Loop: {}  Listening: {}", result, isListening);
          // log.error(result.getBestPronunciationResult());

          if (result != null) {
            String resultText = result.getBestFinalResultNoFiller();
            if (StringUtils.isEmpty(resultText)) {
              // nothing heard?
              continue;
            }
            log.info("recognized: " + resultText + '\n');
            if (resultText.length() > 0 && isListening) {
              if (lockPhrases.size() > 0 && !lockPhrases.contains(resultText) && !confirmations.containsKey(resultText)) {
                log.info(String.format("but locked on %s", resultText));
                continue;
              }

              // command system being used
              if (commands != null) {

                if (currentCommand != null && (confirmations == null || confirmations.containsKey(resultText))) {
                  // i have a command and a confirmation
                  // command sent
                  send(currentCommand.name, currentCommand.method, currentCommand.params);
                  // command finished
                  currentCommand = null;
                  invoke("publishText", "ok");
                  continue;

                } else if (currentCommand != null && negations.containsKey(resultText)) {
                  // negation has happened... recognized the
                  // wrong command
                  // reset command
                  currentCommand = null;
                  // apologee
                  invoke("publishText", "sorry");
                  continue;
                } else if (commands.containsKey(resultText) && (confirmations != null || negations != null)) {
                  if (bypass != null && bypass.containsKey(resultText)) {
                    // we have confirmation and/or negations
                    // - but we also have a bypass
                    send(currentCommand.name, currentCommand.method, currentCommand.params);
                  } else {
                    // setting new potential command - using
                    // either confirmations or negations
                    Command cmd = commands.get(resultText);
                    currentCommand = cmd;
                    invoke("publishRequestConfirmation", resultText);
                    // continue in the loop, we should stop listening, and we
                    // shouldn't publish the text becuase we just asked for
                    // confirmation.
                    continue;
                  }
                } else if (commands.containsKey(resultText)) {
                  // no confirmations or negations are being
                  // used - just send command
                  Command cmd = commands.get(resultText);
                  send(cmd.name, cmd.method, cmd.params);
                } else {
                  error(String.format("unknown use case for Sphinx commands - word is %s", resultText));
                  // we don't know what this command was.. just continue.. we
                  // shouldn't publish text or recognized.
                  // we recognized it. but we don't publish text..
                  invoke("recognized", resultText);
                  continue;
                }
              }

              // publishRecognized(resultText);
              // Only publish the text if there was a known command?
              invoke("publishText", resultText);
              invoke("recognized", resultText);
            }

          } else {
            try {
              Thread.sleep(250);
            } catch (InterruptedException e) {
              // TODO Auto-generated catch block
              logException(e);
            }
            // invoke("unrecognizedSpeech");
            log.error("I can't hear what you said.\n");
          }
        }
      } catch (Exception e) {
        error(e);
      }
    }

  }

  private static final long serialVersionUID = 1L;

  public final static Logger log = LoggerFactory.getLogger(Sphinx.class.getCanonicalName());
  transient Microphone microphone = null;
  transient ConfigurationManager cm = null;
  transient Recognizer recognizer = null;

  transient SpeechProcessor speechProcessor = null;

  private boolean isListening = false;
  // private String lockPhrase = null;
  HashSet<String> lockPhrases = new HashSet<String>();
  HashMap<String, Command> commands = null;
  HashMap<String, Command> confirmations = null;

  HashMap<String, Command> negations = null;

  HashMap<String, Command> bypass = null;

  Command currentCommand = null;

  public static void main(String[] args) {

    LoggingFactory.init(Level.DEBUG);
    try {
      Sphinx ear = (Sphinx) Runtime.createAndStart("ear", "Sphinx");
      SpeechSynthesis speech = new MarySpeech("speech");
      ((MarySpeech) speech).startService();

      // attache speech to ear -
      // auto subscribes to "request confirmation"
      // so that speech asks for confirmation
      // TODO - put this in gui so state will be updated with text
      // question
      ear.addMouth(speech);

      Log log = (Log) Runtime.createAndStart("log", "Log");
      Clock clock = (Clock) Runtime.createAndStart("clock", "Clock");

      // TODO - got to do this - it will be KICKASS !
      // log.subscribe(outMethod, publisherName, inMethod, parameterType)
      // new MRLListener("pulse", log.getName(), "log");

      ear.addCommand("log", log.getName(), "log");
      ear.addCommand("log subscribe to clock", log.getName(), "subscribe", new Object[] { "pulse", });

      ear.addCommand("start clock", clock.getName(), "startClock");
      ear.addCommand("stop clock", clock.getName(), "stopClock");
      ear.addCommand("set clock interval to five seconds", clock.getName(), "setInterval", 5000);
      ear.addCommand("set clock interval to ten seconds", clock.getName(), "setInterval", 10000);

      ear.addComfirmations("yes", "correct", "right", "yeah", "ya");
      ear.addNegations("no", "incorrect", "wrong", "nope", "nah");

      ear.startListening();

      // ear.startListening("camera on | camera off | arm left | arm right |
      // hand left | hand right ");
      // ear.startListening("yes | no");

      // Sphinx ear = new Sphinx("ear");
      // ear.createGrammar("hello | up | down | yes | no");
      // ear.startService();
    } catch (Exception e) {
      Logging.logError(e);
    }

  }

  public Sphinx(String n) {
    super(n);
  }

  public void addBypass(String... txt) {
    if (bypass == null) {
      bypass = new HashMap<String, Command>();
    }
    Command bypassCommand = new Command(this.getName(), "bypass", null);

    for (int i = 0; i < txt.length; ++i) {
      bypass.put(txt[i], bypassCommand);
    }
  }

  public void addComfirmations(String... txt) {
    if (confirmations == null) {
      confirmations = new HashMap<String, Command>();
    }
    Command confirmCommand = new Command(this.getName(), "confirmation", null);

    for (int i = 0; i < txt.length; ++i) {
      confirmations.put(txt[i], confirmCommand);
    }
  }

  // TODO - should this be in Service ?????
  public void addCommand(String actionPhrase, String name, String method, Object... params) {
    if (commands == null) {
      commands = new HashMap<String, Command>();
    }
    commands.put(actionPhrase, new Command(name, method, params));
  }

  public void addNegations(String... txt) {
    if (negations == null) {
      negations = new HashMap<String, Command>();
    }
    Command negationCommand = new Command(this.getName(), "negation", null);

    for (int i = 0; i < txt.length; ++i) {
      negations.put(txt[i], negationCommand);
    }

  }

  public void addTextListener(TextListener service) {
    addListener("publishText", service.getName(), "onText");
  }

  public void addVoiceRecognitionListener(Service s) {
    // TODO - reflect on a public heard method - if doesn't exist error ?
    this.addListener("recognized", s.getName(), "heard");
  }

  // TODO - make "Speech" interface if desired
  // public boolean attach(SpeechSynthesis mouth) {
  // if (mouth == null) {
  // warn("can not attach mouth is null");
  // return false;
  // }
  // // if I'm speaking - I shouldn't be listening
  // mouth.addEar(this);
  // this.addListener("publishText", mouth.getName(), "onText");
  // this.addListener("publishRequestConfirmation", mouth.getName(),
  // "onRequestConfirmation");
  // log.info(String.format("attached Speech service %s to Sphinx service %s
  // with default message routes", mouth.getName(), getName()));
  // return true;
  // }

  public void buildGrammar(StringBuffer sb, HashMap<String, Command> cmds) {
    if (cmds != null) {
      if (sb.length() > 0) {
        sb.append("|");
      }
      int cnt = 0;
      for (String key : cmds.keySet()) {
        ++cnt;
        sb.append(key);
        if (cnt < cmds.size()) {
          sb.append("|");
        }
      }
    }
  }

  /*
   * public void publishRecognized(String recognizedText) { invoke("recognized",
   * recognizedText); }
   */

  public void clearLock() {
    lockPhrases.clear();
  }

  /**
   * createGrammar must be called before the Service starts if a new grammar is
   * needed
   * 
   * example: Sphinx.createGrammar ("ear", "stop | go | left | right | back");
   * ear = Runtime.create("ear", "Sphinx")
   * 
   * @param filename
   *          - name of the Service which will be utilizing this grammar
   * @param grammar
   *          - grammar content
   * @return
   */
  public boolean createGrammar(String grammar) {
    log.info("creating grammar [{}]", grammar);
    // FIXME - probably broken
    // get base simple.xml file - and modify it to
    // point to the correct .gram file
    String simplexml = getServiceResourceFile("simple.xml");
    // String grammarLocation = "file://" + cfgDir.replaceAll("\\\\", "/") +
    // "/";
    // simplexml = simplexml.replaceAll("resource:/resource/",
    // cfgDir.replaceAll("\\\\", "/"));
    simplexml = simplexml.replaceAll("resource:/resource/", ".myrobotlab");

    // a filename like i01.ear.gram (without the gram extention of course
    // because is sucks this out of the xml"
    // and re-processes it to be as fragile as possible :P
    String grammarFileName = getName();
    grammarFileName = grammarFileName.replaceAll("\\.", "_");
    if (grammarFileName.contains(".")) {
      grammarFileName = grammarFileName.substring(0, grammarFileName.indexOf("."));
    }

    simplexml = simplexml.replaceAll("name=\"grammarName\" value=\"simple\"", "name=\"grammarName\" value=\"" + grammarFileName + "\"");
    try {
      FileIO.toFile(String.format("%s%s%s.%s", cfgDir, File.separator, grammarFileName, "xml"), simplexml);
      save("xml", simplexml);

      String gramdef = "#JSGF V1.0;\n" + "grammar " + grammarFileName + ";\n" + "public <greet> = (" + grammar + ");";
      FileIO.toFile(String.format("%s%s%s.%s", cfgDir, File.separator, grammarFileName, "gram"), gramdef);
    } catch (Exception e) {
      Logging.logError(e);
      return false;
    }
    // save("gram", gramdef);

    return true;
  }

  public boolean isRecording() {
    return microphone.isRecording();
  }

  /**
   * an inbound port for Speaking Services (TTS) - which suppress listening such
   * that a system will not listen when its talking, otherwise a feedback loop
   * can occur
   * 
   * @param b
   * @return
   */
  public synchronized boolean onIsSpeaking(Boolean talking) {
    if (talking) {
      isListening = false;
      log.info("I'm talking so I'm not listening"); // Gawd, ain't that
      // the truth !
    } else {
      isListening = true;
      log.info("I'm not talking so I'm listening"); // mebbe
    }
    return talking;
  }

  /**
   * Event is sent when the listening Service is actually listening. There is
   * some delay when it initially loads.
   */
  @Override
  public void listeningEvent() {
    return;
  }

  /**
   * FIXME - the trunk is broke - the configuration is horrible find a way to
   * make this work, despite Sphinx's chaos !
   * 
   * function to swap grammars to allow sphinx a little more capability
   * regarding "new words"
   * 
   * check http://cmusphinx.sourceforge.net/wiki/sphinx4:swappinggrammars
   * 
   * @param newGrammarName
   * @throws PropertyException
   * @throws InstantiationException
   * @throws IOException
   */
  /*
   * FIXME SPHINX IS A MESS IT CAN"T DO THIS ALTHOUGH DOCUMENTATION SAYS IT CAN
   * void swapGrammar(String newGrammarName) throws PropertyException,
   * InstantiationException, IOException { log.debug("Swapping to grammar " +
   * newGrammarName); Linguist linguist = (Linguist) cm.lookup("flatLinguist");
   * linguist.deallocate(); // TODO - bundle sphinx4-1.0beta6 //
   * cm.setProperty("jsgfGrammar", "grammarName", newGrammarName);
   * 
   * linguist.allocate(); }
   */

  public void lockOutAllGrammarExcept(String lockPhrase) {
    this.lockPhrases.add(lockPhrase);
  }

  /*
   * deprecated public void onCommand(String command, String targetName, String
   * targetMethod, Object... data) { Message msg = new Message(); msg.name =
   * targetName; msg.method = targetMethod; msg.data = data;
   * 
   * commandMap.put(command, msg); }
   */

  /**
   * method to suppress recognition listening events This is important when
   * Sphinx is listening --> then Speaking, typically you don't want Sphinx to
   * listen to its own speech, it causes a feedback loop and with Sphinx not
   * really very accurate, it leads to weirdness -- additionally it does not
   * recreate the speech processor - so its not as heavy handed
   */
  @Override
  public synchronized void pauseListening() {
    log.info("Pausing Listening");
    isListening = false;
    if (microphone != null && recognizer != null) {
      // TODO: what does reset monitors do? maybe clear the microphone?
      // maybe neither of these do anything useful
      microphone.stopRecording();
      // microphone.clear();
      // recognizer.resetMonitors();
    }
  }

  @Override
  public String publishText(String recognizedText) {
    return recognizedText;
  }

  /**
   * The main output for this service.
   * 
   * @param word
   * @return the word
   */
  @Override
  public String recognized(String word) {
    return word;
  }

  public String publishRequestConfirmation(String txt) {
    // TODO: rename this to publishRequestConfirmation
    return txt;
  }

  @Override
  public void resumeListening() {
    log.info("resuming listening");
    isListening = true;
    if (microphone != null) {
      // TODO: no idea if this does anything useful.
      microphone.clear();
      microphone.startRecording();
    }
  }

  // FYI - grammar must be created BEFORE we start to listen
  @Override
  public void startListening() {
    startListening(null); // use existing grammar
  }

  // FIXME - re-entrant - make it create new speechProcessor
  // assume its a new grammar
  public void startListening(String grammar) {
    if (speechProcessor != null) {
      log.warn("already listening");
      return;
    }

    StringBuffer newGrammar = new StringBuffer();
    buildGrammar(newGrammar, commands);
    buildGrammar(newGrammar, confirmations);
    buildGrammar(newGrammar, negations);
    buildGrammar(newGrammar, bypass);

    if (grammar != null) {
      if (newGrammar.length() > 0) {
        newGrammar.append("|");
      }
      newGrammar.append(cleanGrammar(grammar));
    }

    createGrammar(newGrammar.toString());

    speechProcessor = new SpeechProcessor(this);
    speechProcessor.start();
  }

  private String cleanGrammar(String grammar) {
    // sphinx doesn't like punctuation in it's grammar commas and periods give
    // it a hard time.
    String clean = grammar.replaceAll("[\\.\\,]", " ");
    return clean;
  }

  @Override
  public void startRecording() {
    microphone.clear();
    microphone.startRecording();
  }

  @Override
  public void stopListening() {
    isListening = false;
    if (speechProcessor != null) {
      speechProcessor.isRunning = false;
    }
    speechProcessor = null;
  }

  /**
   * stopRecording - it does "work", however, the speech recognition part seems
   * to degrade when startRecording is called. I have worked around this by not
   * stopping the recording, but by not processing what was recognized
   */
  @Override
  public void stopMsgRecording() {
    microphone.stopRecording();
    microphone.clear();
  }

  @Override
  public void stopService() {
    super.stopService();
    stopListening();
    if (recognizer != null) {
      recognizer.deallocate();
      recognizer = null;
    }
    if (microphone != null) {
      microphone.stopRecording();
      microphone = null;
    }
  }

  @Override
  public void addMouth(SpeechSynthesis mouth) {
    if (mouth == null) {
      warn("can not attach mouth is null");
      return;
    }
    // if I'm speaking - I shouldn't be listening
    mouth.addEar(this);
    this.addListener("publishText", mouth.getName(), "onText");
    this.addListener("publishRequestConfirmation", mouth.getName(), "onRequestConfirmation");

    addListener("requestConfirmation", mouth.getName(), "onRequestConfirmation");
    log.info("attached Speech service {} to Sphinx service {} with default message routes", mouth.getName(), getName());

  }

  @Override
  public void onStartSpeaking(String utterance) {
    pauseListening();
  }

  @Override
  public void onEndSpeaking(String utterance) {
    resumeListening();
  }

  /**
   * This static method returns all the details of the class without it having
   * to be constructed. It has description, categories, dependencies, and peer
   * definitions.
   * 
   * @return ServiceType - returns all the data
   * 
   */
  static public ServiceType getMetaData() {

    ServiceType meta = new ServiceType(Sphinx.class.getCanonicalName());
    meta.addDescription("open source pure Java speech recognition");
    meta.addCategory("speech recognition", "control");
    meta.addDependency("javax.speech.recognition", "1.0");
    meta.addDependency("edu.cmu.sphinx", "4-1.0beta6");
    return meta;
  }
}