package com.darkprograms.speech.synthesiser; import java.io.InputStream; import java.io.Reader; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; /******************************************************************************* * Synthesiser class that connects to Google's unoffical API to retrieve data * * @author Luke Kuza, Aaron Gokaslan (Skylion) *******************************************************************************/ public class Synthesiser { /** * URL to query for Google synthesiser */ private final static String GOOGLE_SYNTHESISER_URL = "http://translate.google.com/translate_tts?tl="; /** * URL to query for Google Auto Detection */ private final static String GOOGLE_AUTODETECT_URL = "http://translate.google.com/translate_a/t?client=t&sl=auto&text="; /** * language of the Text you want to translate */ private String languageCode; /** * LANG_XX_XXXX Variables are language codes. */ public static final String LANG_AU_ENGLISH = "en-AU"; public static final String LANG_US_ENGLISH = "en-US"; public static final String LANG_UK_ENGLISH = "en-GB"; public static final String LANG_ES_SPANISH = "es"; public static final String LANG_FR_FRENCH = "fr"; public static final String LANG_DE_GERMAN = "de"; //Please add on more regional languages as you find them. Also try to include the accent code if you can can. /** * Constructor */ public Synthesiser() { languageCode = "auto"; } /** * Constructor that takes language code parameter. Specify to "auto" for language autoDetection */ public Synthesiser(String languageCode){ this.languageCode = languageCode; } /** * Returns the current language code for the Synthesiser. * Example: English(Generic) = en, English (US) = en-US, English (UK) = en-GB. and Spanish = es; * @return the current language code parameter */ public String getLanguage(){ return languageCode; } /** * Note: set language to auto to enable automatic language detection. * Setting to null will also implement Google's automatic language detection * @param languageCode The language code you would like to modify languageCode to. */ public void setLanguage(String languageCode){ this.languageCode = languageCode; } /** * Gets an input stream to MP3 data for the returned information from a request * * @param synthText Text you want to be synthesized into MP3 data * @return Returns an input stream of the MP3 data that is returned from Google * @throws Exception Throws exception if it can not complete the request */ public InputStream getMP3Data(String synthText) throws Exception { String languageCode = this.languageCode;//Ensures retention of language settings if set to auto if(languageCode == null || languageCode.equals("") || languageCode.equalsIgnoreCase("auto")){ try{ languageCode = detectLanguage(synthText);//Detects language if(languageCode == null){ languageCode = "en-us";//Reverts to Default Language if it can't detect it. } } catch(Exception ex){ ex.printStackTrace(); languageCode = "en-us";//Reverts to Default Language if it can't detect it. } } if(synthText.length()>100){ List<String> fragments = parseString(synthText);//parses String if too long String tmp = getLanguage(); setLanguage(languageCode);//Keeps it from autodetecting each fragment. InputStream out = getMP3Data(fragments); setLanguage(tmp);//Reverts it to it's previous Language such as auto. return out; } String encoded = URLEncoder.encode(synthText, "UTF-8"); //Encode URL url = new URL(GOOGLE_SYNTHESISER_URL + languageCode + "&q=" + encoded); //create url // Open New URL connection channel. URLConnection urlConn = url.openConnection(); //Open connection urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required return urlConn.getInputStream(); } /** * Gets an InputStream to MP3Data for the returned information from a request * @param synthText List of Strings you want to be synthesized into MP3 data * @return Returns an input stream of all the MP3 data that is returned from Google * @throws Exception Throws exception if it cannot complete the request */ public InputStream getMP3Data(List<String> synthText) throws Exception{ InputStream complete = getMP3Data(synthText.remove(0)); for(String part: synthText){ complete = new java.io.SequenceInputStream(complete, getMP3Data(part));//Concatenate with new MP3 Data } return complete; } /** * Separates a string into smaller parts so that Google will not reject the request. * @param input The string you want to separate * @return A List<String> of the String fragments from your input.. */ private List<String> parseString(String input){ return parseString (input, new ArrayList<String>()); } /** * Separates a string into smaller parts so that Google will not reject the request. * @param input The string you want to break up into smaller parts * @param fragments List<String> that you want to add stuff too. * If you don't have a List<String> already constructed "new ArrayList<String>()" works well. * @return A list of the fragments of the original String */ private List<String> parseString(String input, List<String> fragments){ if(input.length()<=100){//Base Case fragments.add(input); return fragments; } else{ int lastWord = findLastWord(input);//Checks if a space exists if(lastWord<=0){ fragments.add(input.substring(0,100));//In case you sent gibberish to Google. return parseString(input.substring(100), fragments); }else{ fragments.add(input.substring(0,lastWord));//Otherwise, adds the last word to the list for recursion. return parseString(input.substring(lastWord), fragments); } } } /** * Finds the last word in your String (before the index of 99) by searching for spaces and ending punctuation. * Will preferably parse on punctuation to alleviate mid-sentence pausing * @param input The String you want to search through. * @return The index of where the last word of the string ends before the index of 99. */ private int findLastWord(String input){ if(input.length()<100) return input.length(); int space = -1; for(int i = 99; i>0; i--){ char tmp = input.charAt(i); if(isEndingPunctuation(tmp)){ return i+1; } if(space==-1 && tmp == ' '){ space = i; } } if(space>0){ return space; } return -1; } /** * Checks if char is an ending character * Ending punctuation for all languages according to Wikipedia (Except for Sanskrit non-unicode) * @param The char you want check * @return True if it is, false if not. */ private boolean isEndingPunctuation(char input){ return input == '.' || input == '!' || input == '?' || input == ';' || input == ':' || input == '|'; } /** * Automatically determines the language of the original text * @param text represents the text you want to check the language of * @return the languageCode * @throws Exception if it cannot complete the request */ public String detectLanguage(String text) throws Exception{ if(text.length()>99){//Google will not compute more than 99 characters int lastWord = findLastWord(text); if(lastWord<0){ text = text.substring(0,99);//Fix for languages without spaces. } else{ text = text.substring(0,lastWord);//We don't need the whole text to determine language } } String encoded = URLEncoder.encode(text, "UTF-8"); //Encode URL url = new URL(GOOGLE_AUTODETECT_URL + encoded); //Generates URL URLConnection urlConn = url.openConnection(); //Open connection urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required String rawData = urlToText(urlConn);//Gets text from Google if(!isLanguageSupported(rawData)) return null;//Comment this if statement out if you want to use this code for rare languages like Maori. return parseRawData(rawData); } /** * Converts a URL Connection to Text * @param urlConn The Open URLConnection that you want to generate a String from * @return The generated String * @throws Exception if it cannot complete the request */ private String urlToText(URLConnection urlConn) throws Exception{ Reader r = new java.io.InputStreamReader(urlConn.getInputStream());//Gets Data Converts to string StringBuilder buf = new StringBuilder(); while (true) { int ch = r.read(); if (ch < 0) break; buf.append((char) ch); } String str = buf.toString(); return str; } /** * Searches RawData for Language & region if possible * @param RawData the raw String directly from Google you want to search through * @return The language parsed from the rawData or null if Google cannot determine it. */ private String parseRawData(String rawData){ for(int i = 0; i+5<rawData.length(); i++){ boolean dashDetected = rawData.charAt(i+4)=='-';//Sometimes Google will detect the region too. if(rawData.charAt(i)==',' && rawData.charAt(i+1)== '"' && ((rawData.charAt(i+4)=='"' && rawData.charAt(i+5)==',') || dashDetected)){ if(dashDetected){//If region is detected parses the whole string! int lastQuote = rawData.substring(i+2).indexOf('"');//Where the region ends if(lastQuote>0) return rawData.substring(i+2,i+2+lastQuote); } else{ String possible = rawData.substring(i+2,i+4); if(containsLettersOnly(possible)){//Required due to Google's inconsistent formatting. //System.out.println(possible); return possible; } } } }//End of Loop return null; } /** * Checks if all characters in text are letters. * @param text The text you want to determine the validity of. * @return True if all characters are letters, otherwise false. */ private boolean containsLettersOnly(String text){ for(int i = 0; i<text.length(); i++){ if(!Character.isLetter(text.charAt(i))){ return false; } } return true; } /** * Check is a language is supported from rawData * @param rawData Checks if a language is supported based off of rawData * @return true if supported otherwise false. */ private boolean isLanguageSupported(String rawData){ return !rawData.contains(",\"We are not yet able to translate from "); } }