package com.darkprograms.speech.recognizer; import java.io.*; import java.net.URL; import java.net.URLConnection; /*************************************************************** * Class that submits FLAC audio and retrieves recognized text * * @author Luke Kuza, Duncan Jauncey, Aaron Gokaslan **************************************************************/ public class Recognizer { public enum Languages{ AUTO_DETECT(null),//tells Google to auto-detect the language ARABIC_JORDAN("ar-JO"), ARABIC_LEBANON("ar-LB"), ARABIC_QATAR("ar-QA"), ARABIC_UAE("ar-AE"), ARABIC_MOROCCO("ar-MA"), ARABIC_IRAQ("ar-IQ"), ARABIC_ALGERIA("ar-DZ"), ARABIC_BAHRAIN("ar-BH"), ARABIC_LYBIA("ar-LY"), ARABIC_OMAN("ar-OM"), ARABIC_SAUDI_ARABIA("ar-SA"), ARABIC_TUNISIA("ar-TN"), ARABIC_YEMEN("ar-YE"), BASQUE("eu"), CATALAN("ca"), CZECH("cs"), DUTCH("nl-NL"), ENGLISH_AUSTRALIA("en-AU"), ENGLISH_CANADA("en-CA"), ENGLISH_INDIA("en-IN"), ENGLISH_NEW_ZEALAND("en-NZ"), ENGLISH_SOUTH_AFRICA("en-ZA"), ENGLISH_UK("en-GB"), ENGLISH_US("en-US"), FINNISH("fi"), FRENCH("fr-FR"), GALICIAN("gl"), GERMAN("de-DE"), HEBREW("he"), HUNGARIAN("hu"), ICELANDIC("is"), ITALIAN("it-IT"), INDONESIAN("id"), JAPANESE("ja"), KOREAN("ko"), LATIN("la"), MANDARIN_CHINESE("zh-CN"), TRADITIONAL_TAIWAN("zh-TW"), SIMPLIFIED_CHINA("ZH-CN"), SIMPLIFIED_HONG_KONG("zh-HK"), YUE_CHINESE_TRADITIONAL_HONG_KONG("zh-yue"), MALAYSIAN("ms-MY"), NORWEGIAN("no-NO"), POLISH("pl"), PIG_LATIN("xx-piglatin"), PORTUGUESE("pt-PT"), PORTUGUESE_BRASIL("pt-BR"), ROMANIAN("ro-RO"), RUSSIAN("ru"), SERBIAN("sr-SP"), SLOVAK("sk"), SPANISH_ARGENTINA("es-AR"), SPANISH_BOLIVIA("es-BO"), SPANISH_CHILE("es-CL"), SPANISH_COLOMBIA("es-CO"), SPANISH_COSTA_RICA("es-CR"), SPANISH_DOMINICAN_REPUBLIC("es-DO"), SPANISH_ECUADOR("es-EC"), SPANISH_EL_SALVADOR("es-SV"), SPANISH_GUATEMALA("es-GT"), SPANISH_HONDURAS("es-HN"), SPANISH_MEXICO("es-MX"), SPANISH_NICARAGUA("es-NI"), SPANISH_PANAMA("es-PA"), SPANISH_PARAGUAY("es-PY"), SPANISH_PERU("es-PE"), SPANISH_PUERTO_RICO("es-PR"), SPANISH_SPAIN("es-ES"), SPANISH_US("es-US"), SPANISH_URUGUAY("es-UY"), SPANISH_VENEZUELA("es-VE"), SWEDISH("sv-SE"), TURKISH("tr"), ZULU("zu"); /** *Stores the LanguageCode */ private final String languageCode; /** *Constructor */ private Languages(final String languageCode){ this.languageCode = languageCode; } public String toString(){ return languageCode; } } /** * URL to POST audio data and retrieve results */ private static final String GOOGLE_RECOGNIZER_URL = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium"; private boolean profanityFilter = true; private String language = null; /** * Constructor */ public Recognizer() { } /** * Constructor * @param Language */ @Deprecated public Recognizer(String language) { this.language = language; } /** * Constructor * @param language The Languages class for the language you want to designate */ public Recognizer(Languages language){ this.language = language.languageCode; } /** * Constructor * @param profanityFilter */ public Recognizer(boolean profanityFilter){ this.profanityFilter = profanityFilter; } /** * Constructor * @param language * @param profanityFilter */ @Deprecated public Recognizer(String language, boolean profanityFilter){ this.language = language; this.profanityFilter = profanityFilter; } /** * Constructor * @param language * @param profanityFilter */ public Recognizer(Languages language, boolean profanityFilter){ this.language = language.languageCode; this.profanityFilter = profanityFilter; } /** * Language: Contains all supported languages for Google Speech to Text. * Setting this to null will make Google use it's own language detection. * This value is null by default. * @param language */ public void setLanguage(Languages language) { this.language = language.languageCode; } /**Language code. This language code must match the language of the speech to be recognized. ex. en-US ru-RU * This value is null by default. * @param language The language code. */ @Deprecated public void setLanguage(String language) { this.language = language; } /** * Returns the state of profanityFilter * which enables/disables Google's profanity filter (on by default). * @return profanityFilter */ public boolean getProfanityFilter(){ return profanityFilter; } /** * Language code. This language code must match the language of the speech to be recognized. ex. en-US ru-RU * This value is null by default. * @return language the Google language */ public String getLanguage(){ return language; } /** * Get recognized data from a Wave file. This method will encode the wave file to a FLAC * * @param waveFile Wave file to recognize * @param maxResults Maximum number of results to return in response * @return Returns a GoogleResponse, with the response and confidence score * @throws Exception Throws exception if something goes wrong */ public GoogleResponse getRecognizedDataForWave(File waveFile, int maxResults) throws Exception { FlacEncoder flacEncoder = new FlacEncoder(); File flacFile = new File(waveFile + ".flac"); flacEncoder.convertWaveToFlac(waveFile, flacFile); String response = rawRequest(flacFile, maxResults); //Delete converted FLAC data flacFile.delete(); GoogleResponse googleResponse = new GoogleResponse(); parseResponse(response, googleResponse); return googleResponse; } /** * Get recognized data from a Wave file. This method will encode the wave file to a FLAC * * @param waveFile Wave file to recognize * @param maxResults the maximum number of results to return in the response * @return Returns a GoogleResponse, with the response and confidence score * @throws Exception Throws exception if something goes wrong */ public GoogleResponse getRecognizedDataForWave(String waveFile, int maxResults) throws Exception { return getRecognizedDataForWave(new File(waveFile), maxResults); } /** * Get recognized data from a FLAC file. * * @param flacFile FLAC file to recognize * @param maxResults the maximum number of results to return in the response * @return Returns a GoogleResponse, with the response and confidence score * @throws Exception Throws exception if something goes wrong */ public GoogleResponse getRecognizedDataForFlac(File flacFile, int maxResults) throws Exception { String response = rawRequest(flacFile, maxResults); GoogleResponse googleResponse = new GoogleResponse(); parseResponse(response, googleResponse); return googleResponse; } /** * Get recognized data from a FLAC file. * * @param flacFile FLAC file to recognize * @param maxResults the maximum number of results to return in the response * @return Returns a GoogleResponse, with the response and confidence score * @throws Exception Throws exception if something goes wrong */ public GoogleResponse getRecognizedDataForFlac(String flacFile, int maxResults) throws Exception { return getRecognizedDataForFlac(new File(flacFile), maxResults); } /** * Get recognized data from a Wave file. This method will encode the wave file to a FLAC. * This method will automatically set the language to en-US, or English * * @param waveFile Wave file to recognize * @return Returns a GoogleResponse, with the response and confidence score * @throws Exception Throws exception if something goes wrong */ public GoogleResponse getRecognizedDataForWave(File waveFile) throws Exception { return getRecognizedDataForWave(waveFile, 1); } /** * Get recognized data from a Wave file. This method will encode the wave file to a FLAC. * This method will automatically set the language to en-US, or English * * @param waveFile Wave file to recognize * @return Returns a GoogleResponse, with the response and confidence score * @throws Exception Throws exception if something goes wrong */ public GoogleResponse getRecognizedDataForWave(String waveFile) throws Exception { return getRecognizedDataForWave(waveFile, 1); } /** * Get recognized data from a FLAC file. * This method will automatically set the language to en-US, or English * * @param flacFile FLAC file to recognize * @return Returns a GoogleResponse, with the response and confidence score * @throws Exception Throws exception if something goes wrong */ public GoogleResponse getRecognizedDataForFlac(File flacFile) throws Exception { return getRecognizedDataForFlac(flacFile, 1); } /** * Get recognized data from a FLAC file. * This method will automatically set the language to en-US, or English * * @param flacFile FLAC file to recognize * @return Returns a GoogleResponse, with the response and confidence score * @throws Exception Throws exception if something goes wrong */ public GoogleResponse getRecognizedDataForFlac(String flacFile) throws Exception { return getRecognizedDataForFlac(flacFile, 1); } /** * Parses the raw response from Google * * @param rawResponse The raw, unparsed response from Google * @return Returns the parsed response. Index 0 is response, Index 1 is confidence score */ private void parseResponse(String rawResponse, GoogleResponse googleResponse) { if (!rawResponse.contains("utterance")) return; String array = substringBetween(rawResponse, "[", "]"); String[] parts = array.split("}"); boolean first = true; for( String s : parts ) { if( first ) { first = false; String utterancePart = s.split(",")[0]; String confidencePart = s.split(",")[1]; String utterance = utterancePart.split(":")[1]; String confidence = confidencePart.split(":")[1]; utterance = stripQuotes(utterance); confidence = stripQuotes(confidence); if( utterance.equals("null") ) { utterance = null; } if( confidence.equals("null") ) { confidence = null; } googleResponse.setResponse(utterance); googleResponse.setConfidence(confidence); } else { String utterance = s.split(":")[1]; utterance = stripQuotes(utterance); if( utterance.equals("null") ) { utterance = null; } googleResponse.getOtherPossibleResponses().add(utterance); } } } /** * Performs the request to Google with a file <br> * Request is buffered * * @param inputFile Input files to recognize * @return Returns the raw, unparsed response from Google * @throws Exception Throws exception if something went wrong */ private String rawRequest(File inputFile, int maxResults) throws Exception { URL url; URLConnection urlConn; OutputStream outputStream; BufferedReader br; StringBuilder sb = new StringBuilder(GOOGLE_RECOGNIZER_URL); if( language != null ) { sb.append("&lang="); sb.append(language); } if( !profanityFilter ) { sb.append("&pfilter=0"); } sb.append("&maxresults="); sb.append(maxResults); // URL of Remote Script. url = new URL(sb.toString()); // Open New URL connection channel. urlConn = url.openConnection(); // we want to do output. urlConn.setDoOutput(true); // No caching urlConn.setUseCaches(false); // Specify the header content type. urlConn.setRequestProperty("Content-Type", "audio/x-flac; rate=8000"); // Send POST output. outputStream = urlConn.getOutputStream(); FileInputStream fileInputStream = new FileInputStream(inputFile); byte[] buffer = new byte[256]; while ((fileInputStream.read(buffer, 0, 256)) != -1) { outputStream.write(buffer, 0, 256); } fileInputStream.close(); outputStream.close(); // Get response data. br = new BufferedReader(new InputStreamReader(urlConn.getInputStream())); String response = br.readLine(); br.close(); return response; } private String substringBetween(String s, String part1, String part2) { String sub = null; int i = s.indexOf(part1); int j = s.indexOf(part2, i + part1.length()); if (i != -1 && j != -1) { int nStart = i + part1.length(); sub = s.substring(nStart, j); } return sub; } private String stripQuotes(String s) { int start = 0; if( s.startsWith("\"") ) { start = 1; } int end = s.length(); if( s.endsWith("\"") ) { end = s.length() - 1; } return s.substring(start, end); } }