HttpRecognitionService.java example

Explorer

K6nele-master
- app
  - src
    - ee
      - ioc
        phon
        android
        speak
        AboutActivity.java
        AppListActivity.java
        AppListCursorAdapter.java
        AudioPauser.java
        Caller.java
        ChunkedWebRecSessionBuilder.java
        Executable.java
        ExecutableString.java
        GetLanguageDetailsReceiver.java
        GrammarListActivity.java
        Log.java
        OnSwipeTouchListener.java
        PackageNameRegistry.java
        PreferencesRecognitionServiceHttp.java
        PreferencesRecognitionServiceWs.java
        QuickSettingsManager.java
        RecognizerIntentListActivity.java
        RecognizerIntentService.java
        ServerListActivity.java
        ServiceLanguageChooser.java
        activity
        AbstractRecognizerIntentActivity.java
        ComboSelectorActivity.java
        DetailsActivity.java
        FetchUrlActivity.java
        GetPutPreferenceActivity.java
        PermissionsRequesterActivity.java
        Preferences.java
        QuickSettingsActivity.java
        RecognizerIntentActivity.java
        RewritesActivity.java
        RewritesErrorsActivity.java
        RewritesLoaderActivity.java
        RewritesSelectorActivity.java
        SpeechActionActivity.java
        adapter
        ComboAdapter.java
        RewritesAdapter.java
        demo
        AbstractRecognizerDemoActivity.java
        ChatDemoActivity.java
        EncoderDemoActivity.java
        ExtrasDemo.java
        FormDemoActivity.java
        HandsfreeDemoActivity.java
        SimpleDemo.java
        VoiceSearchDemo.java
        fragment
        K6neleListFragment.java
        model
        CallerInfo.java
        Combo.java
        Rewrites.java
        provider
        App.java
        AppsContentProvider.java
        BaseColumnsImpl.java
        FileContentProvider.java
        Grammar.java
        Server.java
        service
        AbstractRecognitionService.java
        DemoRecognitionService.java
        HttpRecognitionService.java
        SpeechInputMethodService.java
        WebSocketRecognitionService.java
        WebSocketRecognitionService2.java
        WebSocketResponse.java
        utils
        QueryUtils.java
        Utils.java
        view
        AbstractSpeechInputViewListener.java
        SpeechInputView.java

/*
 * Copyright 2011-2015, Institute of Cybernetics at Tallinn University of Technology
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ee.ioc.phon.android.speak.service;

import android.app.PendingIntent;
import android.app.PendingIntent.CanceledException;
import android.app.SearchManager;
import android.content.Intent;
import android.os.Bundle;
import android.os.Handler;
import android.os.HandlerThread;
import android.os.Looper;
import android.os.Process;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import ee.ioc.phon.android.speak.ChunkedWebRecSessionBuilder;
import ee.ioc.phon.android.speak.Log;
import ee.ioc.phon.android.speak.R;
import ee.ioc.phon.android.speechutils.AudioRecorder;
import ee.ioc.phon.android.speechutils.EncodedAudioRecorder;
import ee.ioc.phon.android.speechutils.Extras;
import ee.ioc.phon.android.speechutils.utils.IntentUtils;
import ee.ioc.phon.android.speechutils.utils.PreferenceUtils;
import ee.ioc.phon.netspeechapi.recsession.ChunkedWebRecSession;
import ee.ioc.phon.netspeechapi.recsession.Hypothesis;
import ee.ioc.phon.netspeechapi.recsession.Linearization;
import ee.ioc.phon.netspeechapi.recsession.NotAvailableException;
import ee.ioc.phon.netspeechapi.recsession.RecSession;
import ee.ioc.phon.netspeechapi.recsession.RecSessionResult;

/**
 * Implements RecognitionService, connects to the server via HTTP.
 *
 * @author Kaarel Kaljurand
 */
public class HttpRecognitionService extends AbstractRecognitionService {

    // When does the chunk sending start and what is its interval
    private static final int TASK_DELAY_SEND = 100;
    private static final int TASK_INTERVAL_SEND = 300;

    private volatile Looper mSendLooper;
    private volatile Handler mSendHandler;

    private Runnable mSendTask;

    private ChunkedWebRecSession mRecSession;

    @Override
    String getEncoderType() {
        return PreferenceUtils.getPrefString(getSharedPreferences(), getResources(),
                R.string.keyAudioFormat, R.string.defaultAudioFormat);
    }

    @Override
    void configure(Intent recognizerIntent) throws IOException {
        ChunkedWebRecSessionBuilder mRecSessionBuilder = new ChunkedWebRecSessionBuilder(this, getExtras(), null);

        mRecSessionBuilder.setContentType(getEncoderType(), getSampleRate());
        if (Log.DEBUG) Log.i(mRecSessionBuilder.toStringArrayList());
        mRecSession = mRecSessionBuilder.build();
        try {
            mRecSession.create();
        } catch (IOException e) {
            onError(SpeechRecognizer.ERROR_NETWORK);
        } catch (NotAvailableException e) {
            // This cannot happen in the current net-speech-api?
            onError(SpeechRecognizer.ERROR_SERVER);
        }
    }

    @Override
    void connect() {
        HandlerThread thread = new HandlerThread("HttpSendHandlerThread", Process.THREAD_PRIORITY_BACKGROUND);
        thread.start();
        mSendLooper = thread.getLooper();
        mSendHandler = new Handler(mSendLooper);

        // Send chunks to the server
        mSendTask = new Runnable() {
            public void run() {
                AudioRecorder audioRecorder = getRecorder();
                if (audioRecorder != null) {
                    byte[] buffer = audioRecorder.consumeRecording();
                    onBufferReceived(buffer);
                    try {
                        if (audioRecorder instanceof EncodedAudioRecorder) {
                            sendChunk(((EncodedAudioRecorder) audioRecorder).consumeRecordingEnc(), false);
                        } else {
                            sendChunk(buffer, false);
                        }
                        mSendHandler.postDelayed(this, TASK_INTERVAL_SEND);
                    } catch (IOException e) {
                        onError(SpeechRecognizer.ERROR_NETWORK);
                    }
                }
            }
        };
        mSendHandler.postDelayed(mSendTask, TASK_DELAY_SEND);
    }

    @Override
    void disconnect() {
        releaseResources();
    }

    @Override
    boolean isAudioCues() {
        return PreferenceUtils.getPrefBoolean(getSharedPreferences(), getResources(), R.string.keyAudioCues, R.bool.defaultAudioCues);
    }

    @Override
    int getSampleRate() {
        return PreferenceUtils.getPrefInt(getSharedPreferences(), getResources(), R.string.keyRecordingRate, R.string.defaultRecordingRate);
    }

    @Override
    int getAutoStopAfterMillis() {
        return 1000 * Integer.parseInt(
                getSharedPreferences().getString(
                        getString(R.string.keyAutoStopAfterTime),
                        getString(R.string.defaultAutoStopAfterTime)));
    }

    @Override
    boolean isAutoStopAfterPause() {
        // If the caller does not specify this extra, then we set it based on the settings.
        // TODO: in general, we could have 3-valued settings: true, false, use caller
        if (getExtras().containsKey(Extras.EXTRA_UNLIMITED_DURATION)) {
            return !getExtras().getBoolean(Extras.EXTRA_UNLIMITED_DURATION);
        }
        return PreferenceUtils.getPrefBoolean(getSharedPreferences(), getResources(), R.string.keyAutoStopAfterPause, R.bool.defaultAutoStopAfterPause);
    }

    private void releaseResources() {
        stopTasks();
        if (mRecSession != null && !mRecSession.isFinished()) {
            mRecSession.cancel();
        }

        if (mSendLooper != null) {
            mSendLooper.quit();
            mSendLooper = null;
        }
    }


    @Override
    void afterRecording(byte[] recording) {
        stopTasks();
        transcribeAndFinishInBackground(recording);
    }

    /**
     * @param bytes  byte array representing the audio data
     * @param isLast indicates that this is the last chunk that is sent
     * @throws IOException
     */
    private void sendChunk(byte[] bytes, boolean isLast) throws IOException {
        if (mRecSession != null && !mRecSession.isFinished()) {
            mRecSession.sendChunk(bytes, isLast);
        }
    }


    private void stopTasks() {
        if (mSendHandler != null) mSendHandler.removeCallbacks(mSendTask);
    }


    private void transcribeAndFinishInBackground(final byte[] bytes) {
        Thread t = new Thread() {
            public void run() {
                try {
                    sendChunk(bytes, true);
                    getResult(mRecSession);
                } catch (IOException e) {
                    onError(SpeechRecognizer.ERROR_NETWORK);
                } finally {
                    releaseResources();
                }
            }
        };
        t.start();
    }


    /**
     * <p>If there are no results then returns {@code SpeechRecognizer.ERROR_NO_MATCH)}.
     * Otherwise packages the results in two different formats which both use an {@code ArrayList<String>}
     * and sends the results to the caller.</p>
     */
    private void getResult(RecSession recSession) throws IOException {
        RecSessionResult result = recSession.getResult();

        if (result == null) {
            Log.i("Callback: error: ERROR_NO_MATCH: RecSessionResult == null");
            onError(SpeechRecognizer.ERROR_NO_MATCH);
            return;
        }

        List<Hypothesis> hyps = result.getHypotheses();
        if (hyps.isEmpty()) {
            Log.i("Callback: error: ERROR_NO_MATCH: getHypotheses().isEmpty()");
            onError(SpeechRecognizer.ERROR_NO_MATCH);
            return;
        }

        int maxResults = getExtras().getInt(RecognizerIntent.EXTRA_MAX_RESULTS);
        if (maxResults <= 0) {
            maxResults = hyps.size();
        }

        // Utterances OR linearizations
        ArrayList<String> lins = new ArrayList<>();

        // Utterances and their linearizations in a flat serialization
        ArrayList<String> everything = new ArrayList<>();
        ArrayList<Integer> counts = new ArrayList<>(hyps.size());
        int count = 0;
        for (Hypothesis hyp : hyps) {
            if (count++ >= maxResults) {
                break;
            }
            String utterance = hyp.getUtterance();
            // We assume that there is always an utterance. If the utterance is
            // missing then we consider the hypothesis not well-formed and take
            // the next hypothesis.
            if (utterance == null) {
                continue;
            }
            everything.add(utterance);
            List<Linearization> hypLins = hyp.getLinearizations();
            if (hypLins == null || hypLins.isEmpty()) {
                lins.add(hyp.getUtterance());
                counts.add(0);
            } else {
                counts.add(hypLins.size());
                for (Linearization lin : hypLins) {
                    String output = lin.getOutput();
                    everything.add(output);
                    everything.add(lin.getLang());
                    if (output == null || output.length() == 0) {
                        lins.add(utterance);
                    } else {
                        lins.add(output);
                    }
                }
            }
        }
        returnOrForwardMatches(everything, counts, lins);
    }


    /**
     * Returns the transcription results to the caller,
     * or sends them to the pending intent.
     *
     * @param everything recognition results (all the components)
     * @param counts     number of linearizations for each hyphothesis (needed to interpret {@code everything})
     * @param matches    recognition results (just linearizations)
     */
    private void returnOrForwardMatches(ArrayList<String> everything, ArrayList<Integer> counts, ArrayList<String> matches) {
        PendingIntent pendingIntent = IntentUtils.getPendingIntent(getExtras());
        if (pendingIntent == null) {
            Bundle bundle = new Bundle();
            bundle.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION, matches); // TODO: results_recognition
            bundle.putStringArrayList(Extras.RESULTS_RECOGNITION_LINEARIZATIONS, everything);
            bundle.putIntegerArrayList(Extras.RESULTS_RECOGNITION_LINEARIZATION_COUNTS, counts);
            Log.i("Callback: results: RESULTS_RECOGNITION: " + matches);
            Log.i("Callback: results: RESULTS_RECOGNITION_LINEARIZATIONS: " + everything);
            Log.i("Callback: results: RESULTS_RECOGNITION_LINEARIZATIONS_COUNTS: " + counts);
            onResults(bundle);
        } else {
            Log.i("EXTRA_RESULTS_PENDINGINTENT_BUNDLE was used with SpeechRecognizer (this is not tested)");
            // This probably never occurs...
            Bundle bundle = getExtras().getBundle(RecognizerIntent.EXTRA_RESULTS_PENDINGINTENT_BUNDLE);
            if (bundle == null) {
                bundle = new Bundle();
            }
            String match = matches.get(0);
            //mExtraResultsPendingIntentBundle.putString(SearchManager.QUERY, match);
            Intent intent = new Intent();
            intent.putExtras(bundle);
            // This is for Google Maps, YouTube, ...
            intent.putExtra(SearchManager.QUERY, match);
            // This is for SwiftKey X, ...
            intent.putStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS, matches); // TODO: android.speech.extra.RESULTS
            intent.putStringArrayListExtra(Extras.RESULTS_RECOGNITION_LINEARIZATIONS, everything);
            intent.putIntegerArrayListExtra(Extras.RESULTS_RECOGNITION_LINEARIZATION_COUNTS, counts);
            try {
                // TODO: dummy number 1234
                pendingIntent.send(this, 1234, intent);
            } catch (CanceledException e) {
                // TODO
            }
        }
    }
}