/*
* Copyright 2011-2015, Institute of Cybernetics at Tallinn University of Technology
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ee.ioc.phon.android.speak.service;
import android.app.PendingIntent;
import android.app.PendingIntent.CanceledException;
import android.app.SearchManager;
import android.content.Intent;
import android.os.Bundle;
import android.os.Handler;
import android.os.HandlerThread;
import android.os.Looper;
import android.os.Process;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import ee.ioc.phon.android.speak.ChunkedWebRecSessionBuilder;
import ee.ioc.phon.android.speak.Log;
import ee.ioc.phon.android.speak.R;
import ee.ioc.phon.android.speechutils.AudioRecorder;
import ee.ioc.phon.android.speechutils.EncodedAudioRecorder;
import ee.ioc.phon.android.speechutils.Extras;
import ee.ioc.phon.android.speechutils.utils.IntentUtils;
import ee.ioc.phon.android.speechutils.utils.PreferenceUtils;
import ee.ioc.phon.netspeechapi.recsession.ChunkedWebRecSession;
import ee.ioc.phon.netspeechapi.recsession.Hypothesis;
import ee.ioc.phon.netspeechapi.recsession.Linearization;
import ee.ioc.phon.netspeechapi.recsession.NotAvailableException;
import ee.ioc.phon.netspeechapi.recsession.RecSession;
import ee.ioc.phon.netspeechapi.recsession.RecSessionResult;
/**
* Implements RecognitionService, connects to the server via HTTP.
*
* @author Kaarel Kaljurand
*/
public class HttpRecognitionService extends AbstractRecognitionService {
// When does the chunk sending start and what is its interval
private static final int TASK_DELAY_SEND = 100;
private static final int TASK_INTERVAL_SEND = 300;
private volatile Looper mSendLooper;
private volatile Handler mSendHandler;
private Runnable mSendTask;
private ChunkedWebRecSession mRecSession;
@Override
String getEncoderType() {
return PreferenceUtils.getPrefString(getSharedPreferences(), getResources(),
R.string.keyAudioFormat, R.string.defaultAudioFormat);
}
@Override
void configure(Intent recognizerIntent) throws IOException {
ChunkedWebRecSessionBuilder mRecSessionBuilder = new ChunkedWebRecSessionBuilder(this, getExtras(), null);
mRecSessionBuilder.setContentType(getEncoderType(), getSampleRate());
if (Log.DEBUG) Log.i(mRecSessionBuilder.toStringArrayList());
mRecSession = mRecSessionBuilder.build();
try {
mRecSession.create();
} catch (IOException e) {
onError(SpeechRecognizer.ERROR_NETWORK);
} catch (NotAvailableException e) {
// This cannot happen in the current net-speech-api?
onError(SpeechRecognizer.ERROR_SERVER);
}
}
@Override
void connect() {
HandlerThread thread = new HandlerThread("HttpSendHandlerThread", Process.THREAD_PRIORITY_BACKGROUND);
thread.start();
mSendLooper = thread.getLooper();
mSendHandler = new Handler(mSendLooper);
// Send chunks to the server
mSendTask = new Runnable() {
public void run() {
AudioRecorder audioRecorder = getRecorder();
if (audioRecorder != null) {
byte[] buffer = audioRecorder.consumeRecording();
onBufferReceived(buffer);
try {
if (audioRecorder instanceof EncodedAudioRecorder) {
sendChunk(((EncodedAudioRecorder) audioRecorder).consumeRecordingEnc(), false);
} else {
sendChunk(buffer, false);
}
mSendHandler.postDelayed(this, TASK_INTERVAL_SEND);
} catch (IOException e) {
onError(SpeechRecognizer.ERROR_NETWORK);
}
}
}
};
mSendHandler.postDelayed(mSendTask, TASK_DELAY_SEND);
}
@Override
void disconnect() {
releaseResources();
}
@Override
boolean isAudioCues() {
return PreferenceUtils.getPrefBoolean(getSharedPreferences(), getResources(), R.string.keyAudioCues, R.bool.defaultAudioCues);
}
@Override
int getSampleRate() {
return PreferenceUtils.getPrefInt(getSharedPreferences(), getResources(), R.string.keyRecordingRate, R.string.defaultRecordingRate);
}
@Override
int getAutoStopAfterMillis() {
return 1000 * Integer.parseInt(
getSharedPreferences().getString(
getString(R.string.keyAutoStopAfterTime),
getString(R.string.defaultAutoStopAfterTime)));
}
@Override
boolean isAutoStopAfterPause() {
// If the caller does not specify this extra, then we set it based on the settings.
// TODO: in general, we could have 3-valued settings: true, false, use caller
if (getExtras().containsKey(Extras.EXTRA_UNLIMITED_DURATION)) {
return !getExtras().getBoolean(Extras.EXTRA_UNLIMITED_DURATION);
}
return PreferenceUtils.getPrefBoolean(getSharedPreferences(), getResources(), R.string.keyAutoStopAfterPause, R.bool.defaultAutoStopAfterPause);
}
private void releaseResources() {
stopTasks();
if (mRecSession != null && !mRecSession.isFinished()) {
mRecSession.cancel();
}
if (mSendLooper != null) {
mSendLooper.quit();
mSendLooper = null;
}
}
@Override
void afterRecording(byte[] recording) {
stopTasks();
transcribeAndFinishInBackground(recording);
}
/**
* @param bytes byte array representing the audio data
* @param isLast indicates that this is the last chunk that is sent
* @throws IOException
*/
private void sendChunk(byte[] bytes, boolean isLast) throws IOException {
if (mRecSession != null && !mRecSession.isFinished()) {
mRecSession.sendChunk(bytes, isLast);
}
}
private void stopTasks() {
if (mSendHandler != null) mSendHandler.removeCallbacks(mSendTask);
}
private void transcribeAndFinishInBackground(final byte[] bytes) {
Thread t = new Thread() {
public void run() {
try {
sendChunk(bytes, true);
getResult(mRecSession);
} catch (IOException e) {
onError(SpeechRecognizer.ERROR_NETWORK);
} finally {
releaseResources();
}
}
};
t.start();
}
/**
* <p>If there are no results then returns {@code SpeechRecognizer.ERROR_NO_MATCH)}.
* Otherwise packages the results in two different formats which both use an {@code ArrayList<String>}
* and sends the results to the caller.</p>
*/
private void getResult(RecSession recSession) throws IOException {
RecSessionResult result = recSession.getResult();
if (result == null) {
Log.i("Callback: error: ERROR_NO_MATCH: RecSessionResult == null");
onError(SpeechRecognizer.ERROR_NO_MATCH);
return;
}
List<Hypothesis> hyps = result.getHypotheses();
if (hyps.isEmpty()) {
Log.i("Callback: error: ERROR_NO_MATCH: getHypotheses().isEmpty()");
onError(SpeechRecognizer.ERROR_NO_MATCH);
return;
}
int maxResults = getExtras().getInt(RecognizerIntent.EXTRA_MAX_RESULTS);
if (maxResults <= 0) {
maxResults = hyps.size();
}
// Utterances OR linearizations
ArrayList<String> lins = new ArrayList<>();
// Utterances and their linearizations in a flat serialization
ArrayList<String> everything = new ArrayList<>();
ArrayList<Integer> counts = new ArrayList<>(hyps.size());
int count = 0;
for (Hypothesis hyp : hyps) {
if (count++ >= maxResults) {
break;
}
String utterance = hyp.getUtterance();
// We assume that there is always an utterance. If the utterance is
// missing then we consider the hypothesis not well-formed and take
// the next hypothesis.
if (utterance == null) {
continue;
}
everything.add(utterance);
List<Linearization> hypLins = hyp.getLinearizations();
if (hypLins == null || hypLins.isEmpty()) {
lins.add(hyp.getUtterance());
counts.add(0);
} else {
counts.add(hypLins.size());
for (Linearization lin : hypLins) {
String output = lin.getOutput();
everything.add(output);
everything.add(lin.getLang());
if (output == null || output.length() == 0) {
lins.add(utterance);
} else {
lins.add(output);
}
}
}
}
returnOrForwardMatches(everything, counts, lins);
}
/**
* Returns the transcription results to the caller,
* or sends them to the pending intent.
*
* @param everything recognition results (all the components)
* @param counts number of linearizations for each hyphothesis (needed to interpret {@code everything})
* @param matches recognition results (just linearizations)
*/
private void returnOrForwardMatches(ArrayList<String> everything, ArrayList<Integer> counts, ArrayList<String> matches) {
PendingIntent pendingIntent = IntentUtils.getPendingIntent(getExtras());
if (pendingIntent == null) {
Bundle bundle = new Bundle();
bundle.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION, matches); // TODO: results_recognition
bundle.putStringArrayList(Extras.RESULTS_RECOGNITION_LINEARIZATIONS, everything);
bundle.putIntegerArrayList(Extras.RESULTS_RECOGNITION_LINEARIZATION_COUNTS, counts);
Log.i("Callback: results: RESULTS_RECOGNITION: " + matches);
Log.i("Callback: results: RESULTS_RECOGNITION_LINEARIZATIONS: " + everything);
Log.i("Callback: results: RESULTS_RECOGNITION_LINEARIZATIONS_COUNTS: " + counts);
onResults(bundle);
} else {
Log.i("EXTRA_RESULTS_PENDINGINTENT_BUNDLE was used with SpeechRecognizer (this is not tested)");
// This probably never occurs...
Bundle bundle = getExtras().getBundle(RecognizerIntent.EXTRA_RESULTS_PENDINGINTENT_BUNDLE);
if (bundle == null) {
bundle = new Bundle();
}
String match = matches.get(0);
//mExtraResultsPendingIntentBundle.putString(SearchManager.QUERY, match);
Intent intent = new Intent();
intent.putExtras(bundle);
// This is for Google Maps, YouTube, ...
intent.putExtra(SearchManager.QUERY, match);
// This is for SwiftKey X, ...
intent.putStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS, matches); // TODO: android.speech.extra.RESULTS
intent.putStringArrayListExtra(Extras.RESULTS_RECOGNITION_LINEARIZATIONS, everything);
intent.putIntegerArrayListExtra(Extras.RESULTS_RECOGNITION_LINEARIZATION_COUNTS, counts);
try {
// TODO: dummy number 1234
pendingIntent.send(this, 1234, intent);
} catch (CanceledException e) {
// TODO
}
}
}
}