/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.enhancer.engines.celi.langid.impl; import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.net.HttpURLConnection; import java.net.URL; import java.nio.charset.Charset; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Vector; import javax.xml.soap.MessageFactory; import javax.xml.soap.SOAPBody; import javax.xml.soap.SOAPException; import javax.xml.soap.SOAPMessage; import javax.xml.soap.SOAPPart; import javax.xml.transform.stream.StreamSource; import org.apache.clerezza.rdf.core.impl.util.Base64; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringEscapeUtils; import org.apache.stanbol.enhancer.engines.celi.utils.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Element; import org.w3c.dom.NodeList; public class LanguageIdentifierClientHTTP { /** * The UTF-8 {@link Charset} */ private static final Charset UTF8 = Charset.forName("UTF-8"); /** * The content type "text/xml; charset={@link #UTF8}" */ private static final String CONTENT_TYPE = "text/xml; charset="+UTF8.name(); /** * The XML version, encoding; SOAP envelope, heder and starting element of the body; * processTextRequest and text starting element. */ private static final String SOAP_PREFIX = "<soapenv:Envelope " + "xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" " + "xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\">" + "<soapenv:Header/><soapenv:Body>"; /** * closes the text, processTextRequest, SOAP body and envelope */ private static final String SOAP_SUFFIX = "</soapenv:Body></soapenv:Envelope>"; private URL serviceEP; private final Map<String,String> requestHeaders; private final int conTimeout; private final Logger log = LoggerFactory.getLogger(getClass()); public LanguageIdentifierClientHTTP(URL serviceUrl, String licenseKey, int conTimeout){ this.serviceEP=serviceUrl; this.conTimeout = conTimeout; Map<String,String> headers = new HashMap<String,String>(); headers.put("Content-Type", CONTENT_TYPE); if(licenseKey != null){ String encoded = Base64.encode(licenseKey.getBytes(UTF8)); headers.put("Authorization", "Basic "+encoded); } this.requestHeaders = Collections.unmodifiableMap(headers); } //NOTE (rwesten): I rather do the error handling in the EnhancementEngine! public List<GuessedLanguage> guessQueryLanguage(String text) throws IOException, SOAPException{ if(text == null || text.isEmpty()){ // no text return Collections.emptyList(); //no language } //create the POST request HttpURLConnection con = Utils.createPostRequest(serviceEP, requestHeaders,conTimeout); //write content BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(con.getOutputStream(),UTF8)); writer.write(SOAP_PREFIX); writer.write("<lan:guessQueryLanguage><textToGuess>"); StringEscapeUtils.escapeXml(writer, text); writer.write("</textToGuess></lan:guessQueryLanguage>"); writer.write(SOAP_SUFFIX); writer.close(); //Call the service long start = System.currentTimeMillis(); InputStream stream = con.getInputStream(); log.debug("Request to {} took {}ms",serviceEP,System.currentTimeMillis()-start); // Create SoapMessage and parse the results MessageFactory msgFactory = MessageFactory.newInstance(); SOAPMessage message = msgFactory.createMessage(); SOAPPart soapPart = message.getSOAPPart(); // Load the SOAP text into a stream source StreamSource source = new StreamSource(stream); // Set contents of message soapPart.setContent(source); SOAPBody soapBody = message.getSOAPBody(); List<GuessedLanguage> guesses = new Vector<GuessedLanguage>(); NodeList nlist = soapBody.getElementsByTagNameNS("*","return"); for (int i = 0; i < nlist.getLength(); i++) { try { Element result = (Element) nlist.item(i); String lang = result.getAttribute("language"); double d=Double.parseDouble(result.getAttribute("guessConfidence")); guesses.add(new GuessedLanguage(lang, d)); } catch (Exception e) { e.printStackTrace(); } } return guesses; } //NOTE (rwesten): I rather do the error handling in the EnhancementEngine! public List<GuessedLanguage> guessLanguage(String text) throws IOException,SOAPException { if(text == null || text.isEmpty()){ //no text -> no language return Collections.emptyList(); } //create the POST request HttpURLConnection con = Utils.createPostRequest(serviceEP, requestHeaders,conTimeout); //write content BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(con.getOutputStream(),UTF8)); writer.write(SOAP_PREFIX); writer.write("<lan:guessLanguage><textToGuess>"); StringEscapeUtils.escapeXml(writer, text); writer.write("</textToGuess></lan:guessLanguage>"); writer.write(SOAP_SUFFIX); writer.close(); //Call the service long start = System.currentTimeMillis(); InputStream stream = con.getInputStream(); log.debug("Request to {} took {}ms",serviceEP,System.currentTimeMillis()-start); // Create SoapMessage and parse the results MessageFactory msgFactory = MessageFactory.newInstance(); SOAPMessage message = msgFactory.createMessage(); SOAPPart soapPart = message.getSOAPPart(); // Load the SOAP text into a stream source StreamSource source = new StreamSource(stream); // Set contents of message soapPart.setContent(source); SOAPBody soapBody = message.getSOAPBody(); List<GuessedLanguage> guesses = new Vector<GuessedLanguage>(); NodeList nlist = soapBody.getElementsByTagNameNS("*","return"); for (int i = 0; i < nlist.getLength(); i++) { try { Element result = (Element) nlist.item(i); String lang = result.getAttribute("language"); double d=Double.parseDouble(result.getAttribute("guessConfidence")); guesses.add(new GuessedLanguage(lang, d)); } catch (Exception e) { e.printStackTrace(); } } return guesses; } }