InputHandler.java example

Explorer
rayo-server-master
package com.rayo.server.verb;

import java.util.ArrayList;
import java.util.List;

import javax.validation.ConstraintValidatorContext;

import com.rayo.core.verb.Choices;
import com.rayo.core.verb.Input;
import com.rayo.core.verb.InputCompleteEvent;
import com.rayo.core.verb.InputCompleteEvent.Reason;
import com.rayo.core.verb.InputMode;
import com.rayo.core.verb.SignalEvent;
import com.rayo.core.verb.VerbCompleteEvent;
import com.rayo.server.CallActor;
import com.rayo.server.exception.ExceptionMapper;
import com.voxeo.logging.Loggerf;
import com.voxeo.moho.Call;
import com.voxeo.moho.Participant;
import com.voxeo.moho.State;
import com.voxeo.moho.common.event.MohoCPAEvent;
import com.voxeo.moho.event.CPAEvent;
import com.voxeo.moho.event.CPAEvent.Type;
import com.voxeo.moho.event.InputCompleteEvent.Cause;
import com.voxeo.moho.event.InputDetectedEvent;
import com.voxeo.moho.media.input.EnergyGrammar;
import com.voxeo.moho.media.input.Grammar;
import com.voxeo.moho.media.input.InputCommand;
import com.voxeo.moho.media.input.SignalGrammar;
import com.voxeo.moho.media.input.SignalGrammar.Signal;
import com.voxeo.servlet.xmpp.StanzaError;

public class InputHandler extends AbstractLocalVerbHandler<Input, Participant> {

	private com.voxeo.moho.media.Input<Participant> input;
	private List<String> signals = new ArrayList<String>();

	private static final Loggerf logger = Loggerf.getLogger(InputHandler.class);

	/**
	 * <p>
	 * The 'cpa-maxtime' parameter is the "measuring stick" used to determine
	 * 'human' or 'machine' events. If the duration of voice activity is less
	 * than the value of 'cpa-maxtime', the called party is considered to be
	 * 'human.' If voice activity exceeds the 'cpa-maxtime' value, your
	 * application has likely called a 'machine'.
	 * <p>
	 * The recommended value for this parameter is between 4000 and 6000ms.
	 */
	protected long voxeo_cpa_max_time = 4000;

	/**
	 * <p>
	 * The 'cpa-maxsilence' parameter is used to identify the end of voice
	 * activity. When activity begins, CPA will measure the duration until a
	 * period of silence greater than the value of 'cpa-maxsilence' is detected.
	 * Armed with start and end timestamps, CPA can then calculate the total
	 * duration of voice activity.
	 * <p>
	 * A value of 800 to 1200ms is suggested for this parameter.
	 */
	protected long voxeo_cpa_final_silence = 1000;

	/**
	 * <p>
	 * The 'cpa-min-speech-duration' parameter is used to identify the minimum
	 * duration of energy.
	 * <p>
	 * A value of (x)ms to (y)ms is suggested for this parameter.
	 */
	protected long voxeo_cpa_min_speech_duration = 80;

	/**
	 * <p>
	 * The 'cpa-min-volume' parameter is used to identify the threshold of what
	 * is considered to be energy vs silence.
	 * <p>
	 * A value of (x)db to (y)db is suggested for this parameter.
	 */
	protected int voxeo_cpa_min_volume = -24;
	
	protected boolean terminate = false;

	private long _lastStartOfSpeech;
	private long _lastEndOfSpeech;
	private int _retries;
	
	@Override
	public void start() {

		Grammar[] grammars = buildGrammars(model);

		// Is this a CPA request?
		if (getActor() instanceof CallActor) {

			InputCommand inputCommand = new InputCommand(grammars);
			if (model.getCpaData() != null) {
				logger.debug("Starting CPA request with data",
						model.getCpaData());

				if (model.getCpaData().getFinalSilence() != null) {
					voxeo_cpa_final_silence = model.getCpaData()
							.getFinalSilence();
				}
				if (model.getCpaData().getMaxTime() != null) {
					voxeo_cpa_max_time = model.getCpaData().getMaxTime();
				}
				if (model.getCpaData().getMinSpeechDuration() != null) {
					voxeo_cpa_min_speech_duration = model.getCpaData()
							.getMinSpeechDuration();
				}
				if (model.getCpaData().getMinVolume() != null) {
					voxeo_cpa_min_volume = model.getCpaData().getMinVolume()
							.intValue();
				}
				
				terminate = model.getCpaData().isTerminate();

				inputCommand.setAutoRest(false);
				inputCommand.setEnergyParameters(voxeo_cpa_final_silence, null,
						null, voxeo_cpa_min_speech_duration,
						voxeo_cpa_min_volume);
			}
			inputCommand.setInputMode(com.voxeo.moho.media.InputMode.ANY);
			if (model.getMode() != null) {
				inputCommand.setInputMode(getMohoMode(model.getMode()));
			}
			if (model.getMode() == InputMode.DTMF || model.getMode() == InputMode.ANY) {
				inputCommand.setSupervised(true);
			}

			if (model.getInitialTimeout() != null) {
				inputCommand.setInitialTimeout(model.getInitialTimeout()
						.getMillis());
			}
			if (model.getInterDigitTimeout() != null) {
				inputCommand.setInterDigitsTimeout(model.getInterDigitTimeout()
						.getMillis());
			}
			if (model.getMaxSilence() != null) {
				inputCommand.setSpeechIncompleteTimeout(model.getMaxSilence()
						.getMillis());
			}
			if (model.getRecognizer() != null) {
				inputCommand.setRecognizer(model.getRecognizer());
			}
			if (model.getMinConfidence() != null) {
				inputCommand.setMinConfidence(model.getMinConfidence());
			}
			if (model.getSensitivity() != null) {
				inputCommand.setSensitivity(model.getSensitivity());
			}
			if (model.getTerminator() != null) {
				inputCommand.setTerminator(model.getTerminator());
			}

			input = getMediaService().input(inputCommand);
		}
	}

	private Grammar[] buildGrammars(Input model) {

		List<Grammar> grammars = new ArrayList<Grammar>();

		// Is this a CPA request?
		if (getActor() instanceof CallActor) {
			if (model.getCpaData() != null) {
				logger.debug("Starting CPA request with data",
						model.getCpaData());

				grammars.add(new EnergyGrammar(true, false, false));
				grammars.add(new EnergyGrammar(false, true, model.getCpaData().isTerminate()));

				for (String it : model.getCpaData().getSignals()) {
					// We ignore DTMF signal at this layer. Moho will broadcast
					// those events as InputDetectedEvent
					// However the rayo protocol exposes the events through the
					// CPA API for consistency reasons
					// So, DTMF events will go through a different path
					if (!it.equalsIgnoreCase("dtmf") &&
						!it.equalsIgnoreCase("speech")) {
						grammars.add(new SignalGrammar(Signal.parse(it), model.getCpaData().isTerminate()));
					}
					this.signals.add(it);
				}
				if (model.getCpaData().getFinalSilence() != null) {
					voxeo_cpa_final_silence = model.getCpaData()
							.getFinalSilence();
				}
				if (model.getCpaData().getMaxTime() != null) {
					voxeo_cpa_max_time = model.getCpaData().getMaxTime();
				}
				if (model.getCpaData().getMinSpeechDuration() != null) {
					voxeo_cpa_min_speech_duration = model.getCpaData()
							.getMinSpeechDuration();
				}
				if (model.getCpaData().getMinVolume() != null) {
					voxeo_cpa_min_volume = model.getCpaData().getMinVolume()
							.intValue();
				}
			}
		}

		for (int i = 0; i < model.getGrammars().size(); i++) {
			Choices choices = model.getGrammars().get(i);
			if (choices != null) {
				Grammar grammar = null;
				if (choices.getUri() != null) {
					grammar = new Grammar(choices.getUri());
				} else {
					grammar = new Grammar(choices.getContentType(),
							choices.getContent());
				}
				grammars.add(grammar);
			}
		}

		return grammars.toArray(new Grammar[] {});
	}

	@Override
	public void stop(boolean hangup) {

		if (hangup) {
			complete(new InputCompleteEvent(model, Reason.NOMATCH));
		} else {
			if (input != null) {
				input.stop();
			}
		}
	}

	@Override
	public boolean isStateValid(ConstraintValidatorContext context) {

		if (!isReady(participant)) {
			context.buildConstraintViolationWithTemplate(
					"Call is not ready yet.")
					.addNode(
							ExceptionMapper
									.toString(StanzaError.Condition.RESOURCE_CONSTRAINT))
					.addConstraintViolation();
			return false;

		}

		if (!canManipulateMedia()) {
			context.buildConstraintViolationWithTemplate(
					"Media operations are not allowed in the current call status.")
					.addNode(
							ExceptionMapper
									.toString(StanzaError.Condition.RESOURCE_CONSTRAINT))
					.addConstraintViolation();
			return false;
		}
		return true;
	}

	@State
	public void onCPAEvent(CPAEvent<Call> event) {

		logger.debug("Received CPA Event: " + event);

		if (event.getSource().equals(participant)) {
			if (event.getSignal() != null) {
				if (signals != null
						&& signals.contains(event.getSignal().toString()
								.toLowerCase())) {
					getEventDispatcher().fire(
							new com.rayo.core.verb.SignalEvent(
									(Input) getModel(), 
									event.getSignal().toString().toLowerCase(), 
									event.getDuration(),
									null));
				}
			} else {
				if (event.getType() != null
						&& (signals != null && signals.contains("speech"))) {
					SignalEvent signalEvent = buildSignalFromCPAEvent(event);
					if (signalEvent != null) {
						getEventDispatcher().fire(signalEvent);
					}
				}
			}
		}
	}
	
	private SignalEvent buildSignalFromCPAEvent(CPAEvent<Call> event) {
		
		SignalEvent signalEvent = null;
		if (event.getType() != null
				&& (signals != null && signals.contains("speech"))) {
			// human vs machine scenario
			switch (event.getType()) {
			case MACHINE_DETECTED:
				signalEvent = new com.rayo.core.verb.SignalEvent(
					(Input) getModel(), "speech", event.getDuration(), "machine");
				break;
			case HUMAN_DETECTED:
				signalEvent = new com.rayo.core.verb.SignalEvent(
					(Input) getModel(), "speech", event.getDuration(), "human");
				break;
			}
		}	
		return signalEvent;
	}

	@com.voxeo.moho.State
	public void onInputDetected(InputDetectedEvent<Call> event) throws Exception {

		logger.debug(event.toString());
		if (event.getInput() != null) {
			if (signals != null && signals.contains("dtmf")) {
				SignalEvent signalEvent = new SignalEvent(
					(Input)getModel(), "dtmf",event.getInput());
				if (terminate) {
					// This is for compatibility with CPA's terminate tag. Probalby not 
					// much sense for DTMF detection as the same can be achieved via a 
					// [1 DIGITS] grammar, but we should support it anyways
					InputCompleteEvent completeEvent = 
						new InputCompleteEvent(model, Reason.MATCH);
					completeEvent.setSignalEvent(signalEvent);
					try {
						complete(completeEvent);
					} finally {
						stop(false);
					}
				} else {
					fire(signalEvent);
				}
			}			
		} else {
			if (event.isStartOfSpeech()) {
				_lastStartOfSpeech = System.currentTimeMillis();
			} else if (event.isEndOfSpeech()) {
				long duration = calculateDuration();
				Type type;
				if (duration < voxeo_cpa_max_time) {
					type = Type.HUMAN_DETECTED;
				} else {
					type = Type.MACHINE_DETECTED;
				}
				CPAEvent<Call> cpaEvent = new MohoCPAEvent<Call>(
					event.getSource(),type, duration, _retries);
				onCPAEvent(cpaEvent);
				
				_lastStartOfSpeech = 0;
				_lastEndOfSpeech = 0;
			} else if (event.getSignal() != null) {
				onCPAEvent(new MohoCPAEvent<Call>(event.getSource(),
						Type.MACHINE_DETECTED, event.getSignal()));
			}
		}		
	}

	private long calculateDuration() {
		_lastEndOfSpeech = System.currentTimeMillis();

		++_retries;
		long duration = _lastEndOfSpeech - _lastStartOfSpeech;
		return duration;
	}

	@State
	public void onInputComplete(
			com.voxeo.moho.event.InputCompleteEvent<Participant> event) {

		if (!event.getMediaOperation().equals(input)) {
			logger.debug("Ignoring complete event as it is targeted to a different media operation");
			return;
		}

		InputCompleteEvent completeEvent = null;

		switch (event.getCause()) {
			case MATCH:
			case END_OF_SPEECH:
				completeEvent = new InputCompleteEvent(model, Reason.MATCH);
				completeEvent.setConcept(event.getConcept());
				completeEvent.setInterpretation(event.getInterpretation());
				completeEvent.setConfidence(event.getConfidence());
				completeEvent.setUtterance(event.getUtterance());
				completeEvent.setNlsml(event.getNlsml());
				completeEvent.setTag(event.getTag());
				processSignalIfAny(event, completeEvent);
				if (event.getInputMode() != null) {
					completeEvent.setMode(getInputMode(event.getInputMode()));
				}
				break;
			case INI_TIMEOUT:
				completeEvent = new InputCompleteEvent(model, Reason.NOINPUT);
				break;
			case IS_TIMEOUT:
			case MAX_TIMEOUT:
				completeEvent = new InputCompleteEvent(model, Reason.TIMEOUT);
				break;
			case NO_MATCH:
				completeEvent = new InputCompleteEvent(model, Reason.NOMATCH);
				break;
			case CANCEL:
				completeEvent = new InputCompleteEvent(model,
						VerbCompleteEvent.Reason.STOP);
				break;
			case DISCONNECT:
				completeEvent = new InputCompleteEvent(model,
						VerbCompleteEvent.Reason.HANGUP);
				break;
			case ERROR:
				String cause = event.getErrorText() == null ? "Internal Server Error"
						: event.getErrorText();
				completeEvent = new InputCompleteEvent(model, cause);
				break;
			case UNKNOWN:
			default:
				if (participant instanceof Call) {
					if (((Call) participant).getCallState() == com.voxeo.moho.Call.State.DISCONNECTED) {
						completeEvent = new InputCompleteEvent(model,
								VerbCompleteEvent.Reason.HANGUP);
						break;
					}
				}
				completeEvent = new InputCompleteEvent(model,
						"Internal Server Error");
		}

		complete(completeEvent);
	}

	private void processSignalIfAny(
			com.voxeo.moho.event.InputCompleteEvent<Participant> event,
			InputCompleteEvent completeEvent) {
		
		String signal = null;
		String source = null;
		long duration = -1L;
		if (event.getSignal() != null) {
			signal = event.getSignal().toString().toLowerCase();
		} else if (event.getCause() == Cause.END_OF_SPEECH) {
			signal = "speech";
			duration = calculateDuration();
			source = guessSource(duration);
		}
		if (signal != null && signals != null && 
			signals.contains(signal)) {
			completeEvent.setSignalEvent(
					new com.rayo.core.verb.SignalEvent(
							(Input) getModel(), 
							signal, 
							duration,
							source));
		}
	}

	private String guessSource(long duration) {

		if (duration == -1L) return null;
		
		String source;
		if (duration < voxeo_cpa_max_time) {
			source = "human";
		} else {
			source = "machine";
		}
		return source;
	}
}