package com.rayo.server.verb;
import java.util.ArrayList;
import java.util.List;
import javax.validation.ConstraintValidatorContext;
import com.rayo.core.verb.Choices;
import com.rayo.core.verb.Input;
import com.rayo.core.verb.InputCompleteEvent;
import com.rayo.core.verb.InputCompleteEvent.Reason;
import com.rayo.core.verb.InputMode;
import com.rayo.core.verb.SignalEvent;
import com.rayo.core.verb.VerbCompleteEvent;
import com.rayo.server.CallActor;
import com.rayo.server.exception.ExceptionMapper;
import com.voxeo.logging.Loggerf;
import com.voxeo.moho.Call;
import com.voxeo.moho.Participant;
import com.voxeo.moho.State;
import com.voxeo.moho.common.event.MohoCPAEvent;
import com.voxeo.moho.event.CPAEvent;
import com.voxeo.moho.event.CPAEvent.Type;
import com.voxeo.moho.event.InputCompleteEvent.Cause;
import com.voxeo.moho.event.InputDetectedEvent;
import com.voxeo.moho.media.input.EnergyGrammar;
import com.voxeo.moho.media.input.Grammar;
import com.voxeo.moho.media.input.InputCommand;
import com.voxeo.moho.media.input.SignalGrammar;
import com.voxeo.moho.media.input.SignalGrammar.Signal;
import com.voxeo.servlet.xmpp.StanzaError;
public class InputHandler extends AbstractLocalVerbHandler<Input, Participant> {
private com.voxeo.moho.media.Input<Participant> input;
private List<String> signals = new ArrayList<String>();
private static final Loggerf logger = Loggerf.getLogger(InputHandler.class);
/**
* <p>
* The 'cpa-maxtime' parameter is the "measuring stick" used to determine
* 'human' or 'machine' events. If the duration of voice activity is less
* than the value of 'cpa-maxtime', the called party is considered to be
* 'human.' If voice activity exceeds the 'cpa-maxtime' value, your
* application has likely called a 'machine'.
* <p>
* The recommended value for this parameter is between 4000 and 6000ms.
*/
protected long voxeo_cpa_max_time = 4000;
/**
* <p>
* The 'cpa-maxsilence' parameter is used to identify the end of voice
* activity. When activity begins, CPA will measure the duration until a
* period of silence greater than the value of 'cpa-maxsilence' is detected.
* Armed with start and end timestamps, CPA can then calculate the total
* duration of voice activity.
* <p>
* A value of 800 to 1200ms is suggested for this parameter.
*/
protected long voxeo_cpa_final_silence = 1000;
/**
* <p>
* The 'cpa-min-speech-duration' parameter is used to identify the minimum
* duration of energy.
* <p>
* A value of (x)ms to (y)ms is suggested for this parameter.
*/
protected long voxeo_cpa_min_speech_duration = 80;
/**
* <p>
* The 'cpa-min-volume' parameter is used to identify the threshold of what
* is considered to be energy vs silence.
* <p>
* A value of (x)db to (y)db is suggested for this parameter.
*/
protected int voxeo_cpa_min_volume = -24;
protected boolean terminate = false;
private long _lastStartOfSpeech;
private long _lastEndOfSpeech;
private int _retries;
@Override
public void start() {
Grammar[] grammars = buildGrammars(model);
// Is this a CPA request?
if (getActor() instanceof CallActor) {
InputCommand inputCommand = new InputCommand(grammars);
if (model.getCpaData() != null) {
logger.debug("Starting CPA request with data",
model.getCpaData());
if (model.getCpaData().getFinalSilence() != null) {
voxeo_cpa_final_silence = model.getCpaData()
.getFinalSilence();
}
if (model.getCpaData().getMaxTime() != null) {
voxeo_cpa_max_time = model.getCpaData().getMaxTime();
}
if (model.getCpaData().getMinSpeechDuration() != null) {
voxeo_cpa_min_speech_duration = model.getCpaData()
.getMinSpeechDuration();
}
if (model.getCpaData().getMinVolume() != null) {
voxeo_cpa_min_volume = model.getCpaData().getMinVolume()
.intValue();
}
terminate = model.getCpaData().isTerminate();
inputCommand.setAutoRest(false);
inputCommand.setEnergyParameters(voxeo_cpa_final_silence, null,
null, voxeo_cpa_min_speech_duration,
voxeo_cpa_min_volume);
}
inputCommand.setInputMode(com.voxeo.moho.media.InputMode.ANY);
if (model.getMode() != null) {
inputCommand.setInputMode(getMohoMode(model.getMode()));
}
if (model.getMode() == InputMode.DTMF || model.getMode() == InputMode.ANY) {
inputCommand.setSupervised(true);
}
if (model.getInitialTimeout() != null) {
inputCommand.setInitialTimeout(model.getInitialTimeout()
.getMillis());
}
if (model.getInterDigitTimeout() != null) {
inputCommand.setInterDigitsTimeout(model.getInterDigitTimeout()
.getMillis());
}
if (model.getMaxSilence() != null) {
inputCommand.setSpeechIncompleteTimeout(model.getMaxSilence()
.getMillis());
}
if (model.getRecognizer() != null) {
inputCommand.setRecognizer(model.getRecognizer());
}
if (model.getMinConfidence() != null) {
inputCommand.setMinConfidence(model.getMinConfidence());
}
if (model.getSensitivity() != null) {
inputCommand.setSensitivity(model.getSensitivity());
}
if (model.getTerminator() != null) {
inputCommand.setTerminator(model.getTerminator());
}
input = getMediaService().input(inputCommand);
}
}
private Grammar[] buildGrammars(Input model) {
List<Grammar> grammars = new ArrayList<Grammar>();
// Is this a CPA request?
if (getActor() instanceof CallActor) {
if (model.getCpaData() != null) {
logger.debug("Starting CPA request with data",
model.getCpaData());
grammars.add(new EnergyGrammar(true, false, false));
grammars.add(new EnergyGrammar(false, true, model.getCpaData().isTerminate()));
for (String it : model.getCpaData().getSignals()) {
// We ignore DTMF signal at this layer. Moho will broadcast
// those events as InputDetectedEvent
// However the rayo protocol exposes the events through the
// CPA API for consistency reasons
// So, DTMF events will go through a different path
if (!it.equalsIgnoreCase("dtmf") &&
!it.equalsIgnoreCase("speech")) {
grammars.add(new SignalGrammar(Signal.parse(it), model.getCpaData().isTerminate()));
}
this.signals.add(it);
}
if (model.getCpaData().getFinalSilence() != null) {
voxeo_cpa_final_silence = model.getCpaData()
.getFinalSilence();
}
if (model.getCpaData().getMaxTime() != null) {
voxeo_cpa_max_time = model.getCpaData().getMaxTime();
}
if (model.getCpaData().getMinSpeechDuration() != null) {
voxeo_cpa_min_speech_duration = model.getCpaData()
.getMinSpeechDuration();
}
if (model.getCpaData().getMinVolume() != null) {
voxeo_cpa_min_volume = model.getCpaData().getMinVolume()
.intValue();
}
}
}
for (int i = 0; i < model.getGrammars().size(); i++) {
Choices choices = model.getGrammars().get(i);
if (choices != null) {
Grammar grammar = null;
if (choices.getUri() != null) {
grammar = new Grammar(choices.getUri());
} else {
grammar = new Grammar(choices.getContentType(),
choices.getContent());
}
grammars.add(grammar);
}
}
return grammars.toArray(new Grammar[] {});
}
@Override
public void stop(boolean hangup) {
if (hangup) {
complete(new InputCompleteEvent(model, Reason.NOMATCH));
} else {
if (input != null) {
input.stop();
}
}
}
@Override
public boolean isStateValid(ConstraintValidatorContext context) {
if (!isReady(participant)) {
context.buildConstraintViolationWithTemplate(
"Call is not ready yet.")
.addNode(
ExceptionMapper
.toString(StanzaError.Condition.RESOURCE_CONSTRAINT))
.addConstraintViolation();
return false;
}
if (!canManipulateMedia()) {
context.buildConstraintViolationWithTemplate(
"Media operations are not allowed in the current call status.")
.addNode(
ExceptionMapper
.toString(StanzaError.Condition.RESOURCE_CONSTRAINT))
.addConstraintViolation();
return false;
}
return true;
}
@State
public void onCPAEvent(CPAEvent<Call> event) {
logger.debug("Received CPA Event: " + event);
if (event.getSource().equals(participant)) {
if (event.getSignal() != null) {
if (signals != null
&& signals.contains(event.getSignal().toString()
.toLowerCase())) {
getEventDispatcher().fire(
new com.rayo.core.verb.SignalEvent(
(Input) getModel(),
event.getSignal().toString().toLowerCase(),
event.getDuration(),
null));
}
} else {
if (event.getType() != null
&& (signals != null && signals.contains("speech"))) {
SignalEvent signalEvent = buildSignalFromCPAEvent(event);
if (signalEvent != null) {
getEventDispatcher().fire(signalEvent);
}
}
}
}
}
private SignalEvent buildSignalFromCPAEvent(CPAEvent<Call> event) {
SignalEvent signalEvent = null;
if (event.getType() != null
&& (signals != null && signals.contains("speech"))) {
// human vs machine scenario
switch (event.getType()) {
case MACHINE_DETECTED:
signalEvent = new com.rayo.core.verb.SignalEvent(
(Input) getModel(), "speech", event.getDuration(), "machine");
break;
case HUMAN_DETECTED:
signalEvent = new com.rayo.core.verb.SignalEvent(
(Input) getModel(), "speech", event.getDuration(), "human");
break;
}
}
return signalEvent;
}
@com.voxeo.moho.State
public void onInputDetected(InputDetectedEvent<Call> event) throws Exception {
logger.debug(event.toString());
if (event.getInput() != null) {
if (signals != null && signals.contains("dtmf")) {
SignalEvent signalEvent = new SignalEvent(
(Input)getModel(), "dtmf",event.getInput());
if (terminate) {
// This is for compatibility with CPA's terminate tag. Probalby not
// much sense for DTMF detection as the same can be achieved via a
// [1 DIGITS] grammar, but we should support it anyways
InputCompleteEvent completeEvent =
new InputCompleteEvent(model, Reason.MATCH);
completeEvent.setSignalEvent(signalEvent);
try {
complete(completeEvent);
} finally {
stop(false);
}
} else {
fire(signalEvent);
}
}
} else {
if (event.isStartOfSpeech()) {
_lastStartOfSpeech = System.currentTimeMillis();
} else if (event.isEndOfSpeech()) {
long duration = calculateDuration();
Type type;
if (duration < voxeo_cpa_max_time) {
type = Type.HUMAN_DETECTED;
} else {
type = Type.MACHINE_DETECTED;
}
CPAEvent<Call> cpaEvent = new MohoCPAEvent<Call>(
event.getSource(),type, duration, _retries);
onCPAEvent(cpaEvent);
_lastStartOfSpeech = 0;
_lastEndOfSpeech = 0;
} else if (event.getSignal() != null) {
onCPAEvent(new MohoCPAEvent<Call>(event.getSource(),
Type.MACHINE_DETECTED, event.getSignal()));
}
}
}
private long calculateDuration() {
_lastEndOfSpeech = System.currentTimeMillis();
++_retries;
long duration = _lastEndOfSpeech - _lastStartOfSpeech;
return duration;
}
@State
public void onInputComplete(
com.voxeo.moho.event.InputCompleteEvent<Participant> event) {
if (!event.getMediaOperation().equals(input)) {
logger.debug("Ignoring complete event as it is targeted to a different media operation");
return;
}
InputCompleteEvent completeEvent = null;
switch (event.getCause()) {
case MATCH:
case END_OF_SPEECH:
completeEvent = new InputCompleteEvent(model, Reason.MATCH);
completeEvent.setConcept(event.getConcept());
completeEvent.setInterpretation(event.getInterpretation());
completeEvent.setConfidence(event.getConfidence());
completeEvent.setUtterance(event.getUtterance());
completeEvent.setNlsml(event.getNlsml());
completeEvent.setTag(event.getTag());
processSignalIfAny(event, completeEvent);
if (event.getInputMode() != null) {
completeEvent.setMode(getInputMode(event.getInputMode()));
}
break;
case INI_TIMEOUT:
completeEvent = new InputCompleteEvent(model, Reason.NOINPUT);
break;
case IS_TIMEOUT:
case MAX_TIMEOUT:
completeEvent = new InputCompleteEvent(model, Reason.TIMEOUT);
break;
case NO_MATCH:
completeEvent = new InputCompleteEvent(model, Reason.NOMATCH);
break;
case CANCEL:
completeEvent = new InputCompleteEvent(model,
VerbCompleteEvent.Reason.STOP);
break;
case DISCONNECT:
completeEvent = new InputCompleteEvent(model,
VerbCompleteEvent.Reason.HANGUP);
break;
case ERROR:
String cause = event.getErrorText() == null ? "Internal Server Error"
: event.getErrorText();
completeEvent = new InputCompleteEvent(model, cause);
break;
case UNKNOWN:
default:
if (participant instanceof Call) {
if (((Call) participant).getCallState() == com.voxeo.moho.Call.State.DISCONNECTED) {
completeEvent = new InputCompleteEvent(model,
VerbCompleteEvent.Reason.HANGUP);
break;
}
}
completeEvent = new InputCompleteEvent(model,
"Internal Server Error");
}
complete(completeEvent);
}
private void processSignalIfAny(
com.voxeo.moho.event.InputCompleteEvent<Participant> event,
InputCompleteEvent completeEvent) {
String signal = null;
String source = null;
long duration = -1L;
if (event.getSignal() != null) {
signal = event.getSignal().toString().toLowerCase();
} else if (event.getCause() == Cause.END_OF_SPEECH) {
signal = "speech";
duration = calculateDuration();
source = guessSource(duration);
}
if (signal != null && signals != null &&
signals.contains(signal)) {
completeEvent.setSignalEvent(
new com.rayo.core.verb.SignalEvent(
(Input) getModel(),
signal,
duration,
source));
}
}
private String guessSource(long duration) {
if (duration == -1L) return null;
String source;
if (duration < voxeo_cpa_max_time) {
source = "human";
} else {
source = "machine";
}
return source;
}
}