/**
*
* @author greg (at) myrobotlab.org
*
* This file is part of MyRobotLab (http://myrobotlab.org).
*
* MyRobotLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version (subject to the "Classpath" exzception
* as provided in the LICENSE.txt file that accompanied this code).
*
* MyRobotLab is distributed in the hope that it will be useful or fun,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* All libraries in thirdParty bundle are subject to their own license
* requirements - please refer to http://myrobotlab.org/libraries for
* details.
*
* Enjoy !
*
* Dependencies:
* sphinx4-1.0beta6
* google recognition - a network connection is required
*
* References:
* Swapping Grammars - http://cmusphinx.sourceforge.net/wiki/sphinx4:swappinggrammars
*
* http://cmusphinx.sourceforge.net/sphinx4/javadoc/edu/cmu/sphinx/jsgf/JSGFGrammar.html#loadJSGF(java.lang.String)
* TODO - loadJSGF - The JSGF grammar specified by grammarName will be loaded from the base url (tossing out any previously loaded grammars)
*
*
* */
package org.myrobotlab.service;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.commons.lang.StringUtils;
import org.myrobotlab.framework.Service;
import org.myrobotlab.framework.ServiceType;
import org.myrobotlab.io.FileIO;
import org.myrobotlab.logging.Level;
import org.myrobotlab.logging.LoggerFactory;
import org.myrobotlab.logging.Logging;
import org.myrobotlab.logging.LoggingFactory;
import org.myrobotlab.service.interfaces.SpeechRecognizer;
import org.myrobotlab.service.interfaces.SpeechSynthesis;
import org.myrobotlab.service.interfaces.TextListener;
import org.myrobotlab.service.interfaces.TextPublisher;
import org.slf4j.Logger;
import edu.cmu.sphinx.frontend.util.Microphone;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;
import edu.cmu.sphinx.util.props.PropertyException;
/**
*
* Sphinx - Speech recognition based on CMU Sphinx. This service must be told
* what it's listening for. It does not do free-form speech recognition.
*
*/
public class Sphinx extends Service implements SpeechRecognizer, TextPublisher {
/**
* Commands must be created "before" startListening startListening will create
* a grammar file from the data
*
*/
public class Command {
public String name;
public String method;
public Object[] params;
Command(String name, String method, Object[] params) {
this.name = name;
this.method = method;
this.params = params;
}
}
class SpeechProcessor extends Thread {
Sphinx myService = null;
public boolean isRunning = false;
public SpeechProcessor(Sphinx myService) {
super(myService.getName() + "_ear");
this.myService = myService;
}
@Override
public void run() {
try {
isRunning = true;
info(String.format("starting speech processor thread %s_ear", myService.getName()));
String newPath = cfgDir + File.separator + myService.getName() + ".xml";
File localGramFile = new File(newPath);
info("loading grammar file");
if (localGramFile.exists()) {
info(String.format("grammar config %s", newPath));
cm = new ConfigurationManager(newPath);
} else {
// resource in jar default
info(String.format("grammar /resource/Sphinx/simple.xml"));
cm = new ConfigurationManager(this.getClass().getResource("/resource/Sphinx/simple.xml"));
}
info("starting recognizer");
// start the word recognizer
recognizer = (Recognizer) cm.lookup("recognizer");
recognizer.allocate();
info("starting microphone");
microphone = (Microphone) cm.lookup("microphone");
if (!microphone.startRecording()) {
log.error("Cannot start microphone.");
recognizer.deallocate();
}
// loop the recognition until the program exits.
isListening = true;
while (isRunning) {
info("listening: %b", isListening);
invoke("listeningEvent");
Result result = recognizer.recognize();
if (!isListening) {
// we could have stopped listening
Thread.sleep(250);
continue;
}
log.info("Recognized Loop: {} Listening: {}", result, isListening);
// log.error(result.getBestPronunciationResult());
if (result != null) {
String resultText = result.getBestFinalResultNoFiller();
if (StringUtils.isEmpty(resultText)) {
// nothing heard?
continue;
}
log.info("recognized: " + resultText + '\n');
if (resultText.length() > 0 && isListening) {
if (lockPhrases.size() > 0 && !lockPhrases.contains(resultText) && !confirmations.containsKey(resultText)) {
log.info(String.format("but locked on %s", resultText));
continue;
}
// command system being used
if (commands != null) {
if (currentCommand != null && (confirmations == null || confirmations.containsKey(resultText))) {
// i have a command and a confirmation
// command sent
send(currentCommand.name, currentCommand.method, currentCommand.params);
// command finished
currentCommand = null;
invoke("publishText", "ok");
continue;
} else if (currentCommand != null && negations.containsKey(resultText)) {
// negation has happened... recognized the
// wrong command
// reset command
currentCommand = null;
// apologee
invoke("publishText", "sorry");
continue;
} else if (commands.containsKey(resultText) && (confirmations != null || negations != null)) {
if (bypass != null && bypass.containsKey(resultText)) {
// we have confirmation and/or negations
// - but we also have a bypass
send(currentCommand.name, currentCommand.method, currentCommand.params);
} else {
// setting new potential command - using
// either confirmations or negations
Command cmd = commands.get(resultText);
currentCommand = cmd;
invoke("publishRequestConfirmation", resultText);
// continue in the loop, we should stop listening, and we
// shouldn't publish the text becuase we just asked for
// confirmation.
continue;
}
} else if (commands.containsKey(resultText)) {
// no confirmations or negations are being
// used - just send command
Command cmd = commands.get(resultText);
send(cmd.name, cmd.method, cmd.params);
} else {
error(String.format("unknown use case for Sphinx commands - word is %s", resultText));
// we don't know what this command was.. just continue.. we
// shouldn't publish text or recognized.
// we recognized it. but we don't publish text..
invoke("recognized", resultText);
continue;
}
}
// publishRecognized(resultText);
// Only publish the text if there was a known command?
invoke("publishText", resultText);
invoke("recognized", resultText);
}
} else {
try {
Thread.sleep(250);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
logException(e);
}
// invoke("unrecognizedSpeech");
log.error("I can't hear what you said.\n");
}
}
} catch (Exception e) {
error(e);
}
}
}
private static final long serialVersionUID = 1L;
public final static Logger log = LoggerFactory.getLogger(Sphinx.class.getCanonicalName());
transient Microphone microphone = null;
transient ConfigurationManager cm = null;
transient Recognizer recognizer = null;
transient SpeechProcessor speechProcessor = null;
private boolean isListening = false;
// private String lockPhrase = null;
HashSet<String> lockPhrases = new HashSet<String>();
HashMap<String, Command> commands = null;
HashMap<String, Command> confirmations = null;
HashMap<String, Command> negations = null;
HashMap<String, Command> bypass = null;
Command currentCommand = null;
public static void main(String[] args) {
LoggingFactory.init(Level.DEBUG);
try {
Sphinx ear = (Sphinx) Runtime.createAndStart("ear", "Sphinx");
SpeechSynthesis speech = new MarySpeech("speech");
((MarySpeech) speech).startService();
// attache speech to ear -
// auto subscribes to "request confirmation"
// so that speech asks for confirmation
// TODO - put this in gui so state will be updated with text
// question
ear.addMouth(speech);
Log log = (Log) Runtime.createAndStart("log", "Log");
Clock clock = (Clock) Runtime.createAndStart("clock", "Clock");
// TODO - got to do this - it will be KICKASS !
// log.subscribe(outMethod, publisherName, inMethod, parameterType)
// new MRLListener("pulse", log.getName(), "log");
ear.addCommand("log", log.getName(), "log");
ear.addCommand("log subscribe to clock", log.getName(), "subscribe", new Object[] { "pulse", });
ear.addCommand("start clock", clock.getName(), "startClock");
ear.addCommand("stop clock", clock.getName(), "stopClock");
ear.addCommand("set clock interval to five seconds", clock.getName(), "setInterval", 5000);
ear.addCommand("set clock interval to ten seconds", clock.getName(), "setInterval", 10000);
ear.addComfirmations("yes", "correct", "right", "yeah", "ya");
ear.addNegations("no", "incorrect", "wrong", "nope", "nah");
ear.startListening();
// ear.startListening("camera on | camera off | arm left | arm right |
// hand left | hand right ");
// ear.startListening("yes | no");
// Sphinx ear = new Sphinx("ear");
// ear.createGrammar("hello | up | down | yes | no");
// ear.startService();
} catch (Exception e) {
Logging.logError(e);
}
}
public Sphinx(String n) {
super(n);
}
public void addBypass(String... txt) {
if (bypass == null) {
bypass = new HashMap<String, Command>();
}
Command bypassCommand = new Command(this.getName(), "bypass", null);
for (int i = 0; i < txt.length; ++i) {
bypass.put(txt[i], bypassCommand);
}
}
public void addComfirmations(String... txt) {
if (confirmations == null) {
confirmations = new HashMap<String, Command>();
}
Command confirmCommand = new Command(this.getName(), "confirmation", null);
for (int i = 0; i < txt.length; ++i) {
confirmations.put(txt[i], confirmCommand);
}
}
// TODO - should this be in Service ?????
public void addCommand(String actionPhrase, String name, String method, Object... params) {
if (commands == null) {
commands = new HashMap<String, Command>();
}
commands.put(actionPhrase, new Command(name, method, params));
}
public void addNegations(String... txt) {
if (negations == null) {
negations = new HashMap<String, Command>();
}
Command negationCommand = new Command(this.getName(), "negation", null);
for (int i = 0; i < txt.length; ++i) {
negations.put(txt[i], negationCommand);
}
}
public void addTextListener(TextListener service) {
addListener("publishText", service.getName(), "onText");
}
public void addVoiceRecognitionListener(Service s) {
// TODO - reflect on a public heard method - if doesn't exist error ?
this.addListener("recognized", s.getName(), "heard");
}
// TODO - make "Speech" interface if desired
// public boolean attach(SpeechSynthesis mouth) {
// if (mouth == null) {
// warn("can not attach mouth is null");
// return false;
// }
// // if I'm speaking - I shouldn't be listening
// mouth.addEar(this);
// this.addListener("publishText", mouth.getName(), "onText");
// this.addListener("publishRequestConfirmation", mouth.getName(),
// "onRequestConfirmation");
// log.info(String.format("attached Speech service %s to Sphinx service %s
// with default message routes", mouth.getName(), getName()));
// return true;
// }
public void buildGrammar(StringBuffer sb, HashMap<String, Command> cmds) {
if (cmds != null) {
if (sb.length() > 0) {
sb.append("|");
}
int cnt = 0;
for (String key : cmds.keySet()) {
++cnt;
sb.append(key);
if (cnt < cmds.size()) {
sb.append("|");
}
}
}
}
/*
* public void publishRecognized(String recognizedText) { invoke("recognized",
* recognizedText); }
*/
public void clearLock() {
lockPhrases.clear();
}
/**
* createGrammar must be called before the Service starts if a new grammar is
* needed
*
* example: Sphinx.createGrammar ("ear", "stop | go | left | right | back");
* ear = Runtime.create("ear", "Sphinx")
*
* @param filename
* - name of the Service which will be utilizing this grammar
* @param grammar
* - grammar content
* @return
*/
public boolean createGrammar(String grammar) {
log.info("creating grammar [{}]", grammar);
// FIXME - probably broken
// get base simple.xml file - and modify it to
// point to the correct .gram file
String simplexml = getServiceResourceFile("simple.xml");
// String grammarLocation = "file://" + cfgDir.replaceAll("\\\\", "/") +
// "/";
// simplexml = simplexml.replaceAll("resource:/resource/",
// cfgDir.replaceAll("\\\\", "/"));
simplexml = simplexml.replaceAll("resource:/resource/", ".myrobotlab");
// a filename like i01.ear.gram (without the gram extention of course
// because is sucks this out of the xml"
// and re-processes it to be as fragile as possible :P
String grammarFileName = getName();
grammarFileName = grammarFileName.replaceAll("\\.", "_");
if (grammarFileName.contains(".")) {
grammarFileName = grammarFileName.substring(0, grammarFileName.indexOf("."));
}
simplexml = simplexml.replaceAll("name=\"grammarName\" value=\"simple\"", "name=\"grammarName\" value=\"" + grammarFileName + "\"");
try {
FileIO.toFile(String.format("%s%s%s.%s", cfgDir, File.separator, grammarFileName, "xml"), simplexml);
save("xml", simplexml);
String gramdef = "#JSGF V1.0;\n" + "grammar " + grammarFileName + ";\n" + "public <greet> = (" + grammar + ");";
FileIO.toFile(String.format("%s%s%s.%s", cfgDir, File.separator, grammarFileName, "gram"), gramdef);
} catch (Exception e) {
Logging.logError(e);
return false;
}
// save("gram", gramdef);
return true;
}
public boolean isRecording() {
return microphone.isRecording();
}
/**
* an inbound port for Speaking Services (TTS) - which suppress listening such
* that a system will not listen when its talking, otherwise a feedback loop
* can occur
*
* @param b
* @return
*/
public synchronized boolean onIsSpeaking(Boolean talking) {
if (talking) {
isListening = false;
log.info("I'm talking so I'm not listening"); // Gawd, ain't that
// the truth !
} else {
isListening = true;
log.info("I'm not talking so I'm listening"); // mebbe
}
return talking;
}
/**
* Event is sent when the listening Service is actually listening. There is
* some delay when it initially loads.
*/
@Override
public void listeningEvent() {
return;
}
/**
* FIXME - the trunk is broke - the configuration is horrible find a way to
* make this work, despite Sphinx's chaos !
*
* function to swap grammars to allow sphinx a little more capability
* regarding "new words"
*
* check http://cmusphinx.sourceforge.net/wiki/sphinx4:swappinggrammars
*
* @param newGrammarName
* @throws PropertyException
* @throws InstantiationException
* @throws IOException
*/
/*
* FIXME SPHINX IS A MESS IT CAN"T DO THIS ALTHOUGH DOCUMENTATION SAYS IT CAN
* void swapGrammar(String newGrammarName) throws PropertyException,
* InstantiationException, IOException { log.debug("Swapping to grammar " +
* newGrammarName); Linguist linguist = (Linguist) cm.lookup("flatLinguist");
* linguist.deallocate(); // TODO - bundle sphinx4-1.0beta6 //
* cm.setProperty("jsgfGrammar", "grammarName", newGrammarName);
*
* linguist.allocate(); }
*/
public void lockOutAllGrammarExcept(String lockPhrase) {
this.lockPhrases.add(lockPhrase);
}
/*
* deprecated public void onCommand(String command, String targetName, String
* targetMethod, Object... data) { Message msg = new Message(); msg.name =
* targetName; msg.method = targetMethod; msg.data = data;
*
* commandMap.put(command, msg); }
*/
/**
* method to suppress recognition listening events This is important when
* Sphinx is listening --> then Speaking, typically you don't want Sphinx to
* listen to its own speech, it causes a feedback loop and with Sphinx not
* really very accurate, it leads to weirdness -- additionally it does not
* recreate the speech processor - so its not as heavy handed
*/
@Override
public synchronized void pauseListening() {
log.info("Pausing Listening");
isListening = false;
if (microphone != null && recognizer != null) {
// TODO: what does reset monitors do? maybe clear the microphone?
// maybe neither of these do anything useful
microphone.stopRecording();
// microphone.clear();
// recognizer.resetMonitors();
}
}
@Override
public String publishText(String recognizedText) {
return recognizedText;
}
/**
* The main output for this service.
*
* @param word
* @return the word
*/
@Override
public String recognized(String word) {
return word;
}
public String publishRequestConfirmation(String txt) {
// TODO: rename this to publishRequestConfirmation
return txt;
}
@Override
public void resumeListening() {
log.info("resuming listening");
isListening = true;
if (microphone != null) {
// TODO: no idea if this does anything useful.
microphone.clear();
microphone.startRecording();
}
}
// FYI - grammar must be created BEFORE we start to listen
@Override
public void startListening() {
startListening(null); // use existing grammar
}
// FIXME - re-entrant - make it create new speechProcessor
// assume its a new grammar
public void startListening(String grammar) {
if (speechProcessor != null) {
log.warn("already listening");
return;
}
StringBuffer newGrammar = new StringBuffer();
buildGrammar(newGrammar, commands);
buildGrammar(newGrammar, confirmations);
buildGrammar(newGrammar, negations);
buildGrammar(newGrammar, bypass);
if (grammar != null) {
if (newGrammar.length() > 0) {
newGrammar.append("|");
}
newGrammar.append(cleanGrammar(grammar));
}
createGrammar(newGrammar.toString());
speechProcessor = new SpeechProcessor(this);
speechProcessor.start();
}
private String cleanGrammar(String grammar) {
// sphinx doesn't like punctuation in it's grammar commas and periods give
// it a hard time.
String clean = grammar.replaceAll("[\\.\\,]", " ");
return clean;
}
@Override
public void startRecording() {
microphone.clear();
microphone.startRecording();
}
@Override
public void stopListening() {
isListening = false;
if (speechProcessor != null) {
speechProcessor.isRunning = false;
}
speechProcessor = null;
}
/**
* stopRecording - it does "work", however, the speech recognition part seems
* to degrade when startRecording is called. I have worked around this by not
* stopping the recording, but by not processing what was recognized
*/
@Override
public void stopMsgRecording() {
microphone.stopRecording();
microphone.clear();
}
@Override
public void stopService() {
super.stopService();
stopListening();
if (recognizer != null) {
recognizer.deallocate();
recognizer = null;
}
if (microphone != null) {
microphone.stopRecording();
microphone = null;
}
}
@Override
public void addMouth(SpeechSynthesis mouth) {
if (mouth == null) {
warn("can not attach mouth is null");
return;
}
// if I'm speaking - I shouldn't be listening
mouth.addEar(this);
this.addListener("publishText", mouth.getName(), "onText");
this.addListener("publishRequestConfirmation", mouth.getName(), "onRequestConfirmation");
addListener("requestConfirmation", mouth.getName(), "onRequestConfirmation");
log.info("attached Speech service {} to Sphinx service {} with default message routes", mouth.getName(), getName());
}
@Override
public void onStartSpeaking(String utterance) {
pauseListening();
}
@Override
public void onEndSpeaking(String utterance) {
resumeListening();
}
/**
* This static method returns all the details of the class without it having
* to be constructed. It has description, categories, dependencies, and peer
* definitions.
*
* @return ServiceType - returns all the data
*
*/
static public ServiceType getMetaData() {
ServiceType meta = new ServiceType(Sphinx.class.getCanonicalName());
meta.addDescription("open source pure Java speech recognition");
meta.addCategory("speech recognition", "control");
meta.addDependency("javax.speech.recognition", "1.0");
meta.addDependency("edu.cmu.sphinx", "4-1.0beta6");
return meta;
}
}