package com.darkprograms.speech.translator; import java.io.IOException; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Locale; /*************************************************************************************************************** * An API for a Google Translation service in Java. * Please Note: This API is unofficial and is not supported by Google. Subject to breakage at any time. * The translator allows for language detection and translation. * Recommended for translation of user interfaces or speech commands. * All translation services provided via Google Translate * @author Aaron Gokaslan (Skylion) ***************************************************************************************************************/ public final class GoogleTranslate { //Class marked as final since all methods are static /** * URL to query for Translation */ private final static String GOOGLE_TRANSLATE_URL = "http://translate.google.com/translate_a/single"; /** * Private to prevent instantiation */ private GoogleTranslate(){}; /** * Converts the ISO-639 code into a friendly language code in the user's default language * For example, if the language is English and the default locale is French, it will return "anglais" * Useful for UI Strings * @param languageCode The ISO639-1 * @return The language in the user's default language */ public static String getDisplayLanguage(String languageCode){ return (new Locale(languageCode)).getDisplayLanguage(); } /** Completes the complicated process of generating the URL * @param sourceLanguage The source language * @param targetLanguage The target language * @param text The text that you wish to generate * @return The generated URL as a string. */ private static String generateURL(String sourceLanguage, String targetLanguage, String text) throws UnsupportedEncodingException{ String encoded = URLEncoder.encode(text, "UTF-8"); //Encode StringBuilder sb = new StringBuilder(); sb.append(GOOGLE_TRANSLATE_URL); sb.append("?client=webapp"); //The client parameter sb.append("&hl=en"); //The language of the UI? sb.append("&sl="); //Source language sb.append(sourceLanguage); sb.append("&tl="); //Target language sb.append(targetLanguage); sb.append("&q="); sb.append(encoded); sb.append("&multires=1");//Necessary but unknown parameters sb.append("&otf=0"); sb.append("&pc=0"); sb.append("&trs=1"); sb.append("&ssel=0"); sb.append("&tsel=0"); sb.append("&kc=1"); sb.append("&dt=t");//This parameters requests the translated text back. //Other dt parameters request additional information such as pronunciation, and so on. //TODO Modify API so that the user may request this additional information. sb.append("&ie=UTF-8"); //Input encoding sb.append("&oe=UTF-8"); //Output encoding sb.append("&tk="); //Token authentication parameter sb.append(generateToken(text)); return sb.toString(); } /** * Automatically determines the language of the original text * @param text represents the text you want to check the language of * @return The ISO-639 code for the language * @throws IOException if it cannot complete the request */ public static String detectLanguage(String text) throws IOException{ String urlText = generateURL("auto", "en", text); URL url = new URL(urlText); //Generates URL String rawData = urlToText(url);//Gets text from Google return findLanguage(rawData); } /** * Automatically translates text to a system's default language according to its locale * Useful for creating international applications as you can translate UI strings * @see GoogleTranslate#translate(String, String, String) * @param text The text you want to translate * @return The translated text * @throws IOException if cannot complete request */ public static String translate(String text) throws IOException{ return translate(Locale.getDefault().getLanguage(), text); } /** * Automatically detects language and translate to the targetLanguage. * Allows Google to determine source language * @see GoogleTranslate#translate(String, String, String) * @param targetLanguage The language you want to translate into in ISO-639 format * @param text The text you actually want to translate * @return The translated text. * @throws IOException if it cannot complete the request */ public static String translate(String targetLanguage, String text) throws IOException{ return translate("auto",targetLanguage, text); } /** * Translate text from sourceLanguage to targetLanguage * Specifying the sourceLanguage greatly improves accuracy over short Strings * @param sourceLanguage The language you want to translate from in ISO-639 format * @param targetLanguage The language you want to translate into in ISO-639 format * @param text The text you actually want to translate * @return the translated text. * @throws IOException if it cannot complete the request */ public static String translate(String sourceLanguage, String targetLanguage, String text) throws IOException{ String urlText = generateURL(sourceLanguage, targetLanguage, text); URL url = new URL(urlText); String rawData = urlToText(url);//Gets text from Google if(rawData==null){ return null; } String[] raw = rawData.split("\"");//Parses the JSON if(raw.length<2){ return null; } return raw[1];//Returns the translation } /** * Converts a URL to Text * @param url that you want to generate a String from * @return The generated String * @throws IOException if it cannot complete the request */ private static String urlToText(URL url) throws IOException{ URLConnection urlConn = url.openConnection(); //Open connection //Adding header for user agent is required. Otherwise, Google rejects the request urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); Reader r = new java.io.InputStreamReader(urlConn.getInputStream(), Charset.forName("UTF-8"));//Gets Data Converts to string StringBuilder buf = new StringBuilder(); while (true) {//Reads String from buffer int ch = r.read(); if (ch < 0) break; buf.append((char) ch); } String str = buf.toString(); return str; } /** * Searches RAWData for Language * @param RAWData the raw String directly from Google you want to search through * @return The language parsed from the rawData or en-US (English-United States) if Google cannot determine it. */ private static String findLanguage(String rawData){ for(int i = 0; i+5<rawData.length(); i++){ boolean dashDetected = rawData.charAt(i+4)=='-'; if(rawData.charAt(i)==',' && rawData.charAt(i+1)== '"' && ((rawData.charAt(i+4)=='"' && rawData.charAt(i+5)==',') || dashDetected)){ if(dashDetected){ int lastQuote = rawData.substring(i+2).indexOf('"'); if(lastQuote>0) return rawData.substring(i+2,i+2+lastQuote); } else{ String possible = rawData.substring(i+2,i+4); if(containsLettersOnly(possible)){//Required due to Google's inconsistent formatting. return possible; } } } } return null; } /** * Checks if all characters in text are letters. * @param text The text you want to determine the validity of. * @return True if all characters are letter, otherwise false. */ private static boolean containsLettersOnly(String text){ for(int i = 0; i<text.length(); i++){ if(!Character.isLetter(text.charAt(i))){ return false; } } return true; } /*************************** Cryptography section ************************************************ ******************** Thank Dean1510 for the excellent code translation **************************/ //TODO Possibly refactor code as utility class /** * This function generates the int array for translation acting as the seed for the hashing algorithm. */ private static int[] TKK() { int[] tkk = { 0x6337E, 0x217A58DC + 0x5AF91132}; return tkk; } /** * An implementation of an unsigned right shift. * Necessary since Java does not have unsigned ints. * @param x The number you wish to shift. * @param bits The number of bytes you wish to shift. * @return The shifted number, unsigned. */ private static int shr32(int x, int bits) { if (x < 0) { long x_l = 0xffffffffl + x + 1; return (int) (x_l >> bits); } return x >> bits; } private static int RL(int a, String b) {//I am not entirely sure what this magic does. for (int c = 0; c < b.length() - 2; c += 3) { int d = b.charAt(c + 2); d = d >= 65 ? d - 87 : d - 48; d = b.charAt(c + 1) == '+' ? shr32(a, d) : (a << d); a = b.charAt(c) == '+' ? (a + (d & 0xFFFFFFFF)) : a ^ d; } return a; } /** * Generates the token needed for translation. * @param text The text you want to generate the token for. * @return The generated token as a string. */ private static String generateToken(String text) { int tkk[] = TKK(); int b = tkk[0]; int e = 0; int f = 0; List<Integer> d = new ArrayList<Integer>(); for (; f < text.length(); f++) { int g = text.charAt(f); if (0x80 > g) { d.add(e++, g); } else { if (0x800 > g) { d.add(e++, g >> 6 | 0xC0); } else { if (0xD800 == (g & 0xFC00) && f + 1 < text.length() && 0xDC00 == (text.charAt(f + 1) & 0xFC00)) { g = 0x10000 + ((g & 0x3FF) << 10) + (text.charAt(++f) & 0x3FF); d.add(e++, g >> 18 | 0xF0); d.add(e++, g >> 12 & 0x3F | 0x80); } else { d.add(e++, g >> 12 | 0xE0); d.add(e++, g >> 6 & 0x3F | 0x80); } } d.add(e++, g & 63 | 128); } } int a_i = b; for (e = 0; e < d.size(); e++) { a_i += d.get(e); a_i = RL(a_i, "+-a^+6"); } a_i = RL(a_i, "+-3^+b+-f"); a_i ^= tkk[1]; long a_l; if (0 > a_i) { a_l = 0x80000000l + (a_i & 0x7FFFFFFF); } else { a_l = a_i; } a_l %= Math.pow(10, 6); return String.format(Locale.US, "%d.%d", a_l, a_l ^ b); } }