package com.meaningcloud.gate; import com.meaningcloud.gate.clients.LangClient; import com.meaningcloud.gate.param.ASutil; import gate.Annotation; import gate.AnnotationSet; import gate.DocumentContent; import gate.Factory; import gate.FeatureMap; import gate.ProcessingResource; import gate.creole.AbstractLanguageAnalyser; import gate.creole.ExecutionException; import gate.creole.metadata.CreoleParameter; import gate.creole.metadata.CreoleResource; import gate.creole.metadata.Optional; import gate.creole.metadata.RunTime; import gate.util.InvalidOffsetException; import gate.util.Out; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * This class is the implementation of the resource api.meaningcloud.com/lang. */ @CreoleResource(name = "MeaningCloud Language Identification", comment = "MeaningCloud Language Identification", helpURL = "http://www.meaningcloud.com/developer/language-identification/doc/1.1", icon = "/MeaningCloud.png") public class MeaningCloudLang extends AbstractLanguageAnalyser implements ProcessingResource { private String apiURL, key; private String inputASname, outputASname; private List<String> annotationTypes = new ArrayList<String>(); private boolean debug; public void execute() throws ExecutionException { if (document == null) throw new ExecutionException("No Document Provided"); if (key == null || key.trim().isEmpty()) throw new ExecutionException("No API Key Provided"); if (apiURL == null || apiURL.trim().isEmpty()) throw new ExecutionException("Service URL Has Not Been Set"); AnnotationSet inputAnnSet = document.getAnnotations(inputASname); String text = ""; DocumentContent content = document.getContent(); if (inputAnnSet.isEmpty()) { text += content.toString(); process(text, null); } else { if (annotationTypes.size() == 0) { text += content.toString(); process(text, null); /* * Iterator<Annotation> inputIt = * gate.Utils.inDocumentOrder(inputAnnSet).iterator(); * * while(inputIt.hasNext()) { Annotation ann = inputIt.next(); * try { text = * content.getContent(ann.getStartNode().getOffset(), * ann.getEndNode().getOffset()).toString(); } * catch(InvalidOffsetException ex) { * Logger.getLogger(MeaningCloudClass.class.getName()).log( * Level.SEVERE, null, ex); } process(text, ann); } */ } else { if (debug) Out.println("annotationTypes size: " + annotationTypes.size()); for (String inputAnnExpr : annotationTypes) { if (debug) Out.println("inputAnnExpr: " + inputAnnExpr); AnnotationSet filteredAS = ASutil.getFilteredAS( inputAnnSet, inputAnnExpr); if (debug) Out.println("FilteredAS: " + gate.Utils.cleanStringFor(document, filteredAS)); Iterator<Annotation> itr = gate.Utils.inDocumentOrder( filteredAS).iterator(); while (itr.hasNext()) { Annotation ann = itr.next(); try { text = content.getContent( ann.getStartNode().getOffset(), ann.getEndNode().getOffset()).toString(); } catch (InvalidOffsetException ex) { Logger.getLogger(MeaningCloudClass.class.getName()) .log(Level.SEVERE, null, ex); } process(text, ann); } } } } } public void process(String text, Annotation inputAnn) { String type = ""; if (inputAnn != null) { type = inputAnn.getType(); if (debug) Out.println("Processing: " + inputAnn.getType()); } else { if (debug) Out.println("Processing the whole document"); } // LangClient c = new LangClient(); String api = this.getApiURL(); String key = this.getKey(); String txt = text; if (!txt.isEmpty() && !txt.equals("0")) { if (debug) Out.println("Text: " + txt); Post post; try { post = new Post(api); if (key != null && !key.isEmpty()) post.addParameter("key", key); else { Logger.getLogger(MeaningCloudTopics.class.getName()).severe( "Key is not set"); return; } post.addParameter("src", "gate_2.3"); post.addParameter("txt", txt); post.addParameter("of", "xml"); byte[] response = post.getResponse().getBytes("UTF-8"); String resp = new String(response, "UTF-8"); if (debug) Out.println("Response:" + resp); // Show topics DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory .newInstance(); DocumentBuilder docBuilder; try { docBuilder = docBuilderFactory.newDocumentBuilder(); org.w3c.dom.Document doc = docBuilder .parse(new ByteArrayInputStream(response)); doc.getDocumentElement().normalize(); Element response_node = doc.getDocumentElement(); try { NodeList statusL = response_node .getElementsByTagName("status"); org.w3c.dom.Node status = statusL.item(0); NamedNodeMap attributes = status.getAttributes(); org.w3c.dom.Node code = attributes.item(0); if (!code.getTextContent().equals("0")) { Logger.getLogger(MeaningCloudTopics.class.getName()) .severe("API Error: " + code.getTextContent() + "" + post.params.toString()); } else { try { List<String> updated = LangClient .collectInfo(response_node); setDocFeatures(updated, type, inputAnn); } catch (Exception e) { Logger.getLogger( MeaningCloudClass.class.getName()).log( Level.SEVERE, null, e); } } } catch (Exception e) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, e); Logger.getLogger(MeaningCloudClass.class.getName()) .severe("Not found"); } } catch (ParserConfigurationException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } catch (SAXException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } } catch (UnsupportedEncodingException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } } } public void setDocFeatures(List<String> lang_list, String type, Annotation inputAnn) throws InvalidOffsetException, UnsupportedEncodingException { if (lang_list.size() > 0) { Iterator<String> it = lang_list.iterator(); FeatureMap fm = Factory.newFeatureMap(); List<String> lang = new ArrayList<String>(); while (it.hasNext()) { lang.add(new String(it.next().getBytes(), "utf-8")); } fm.put("lang", lang); if (inputAnn != null) { Logger.getLogger(MeaningCloudClass.class.getName()) .info("The text you have processed is written in " + fm.get("lang") + ". The annotation was created as a new Feature of your inputAS"); FeatureMap fm2 = inputAnn.getFeatures(); fm2.putAll(fm); } else { Logger.getLogger(MeaningCloudClass.class.getName()) .info("The text you have processed is written in " + fm.get("lang") + ". The annotation was created as a Document Feature"); FeatureMap fm2 = document.getFeatures(); fm2.putAll(fm); } } } @RunTime @CreoleParameter(comment = "URL Of the API to query", defaultValue = "http://api.meaningcloud.com/lang-1.1") public void setApiURL(String apiURL) { this.apiURL = apiURL; } public String getApiURL() { return apiURL; } @RunTime @CreoleParameter(comment = "License Key") public void setKey(String key) { this.key = key; } public String getKey() { return key; } @RunTime @Optional @CreoleParameter(comment = "AnnotationSet with the input content") public void setInputASName(String t) { this.inputASname = t; } public String getInputASName() { return inputASname; } @RunTime @Optional @CreoleParameter(comment = "Output Annotation Set", defaultValue = "MeaningCloud") public void setOutputASName(String t) { this.outputASname = t; } public String getOutputASName() { return outputASname; } @RunTime @Optional @CreoleParameter(defaultValue = "false", comment = "Debug variable for the GATE plugin") public void setDebug(Boolean verb) { this.debug = verb; } public Boolean getDebug() { return debug; } @RunTime @Optional @CreoleParameter(comment = "Filter content by this expression. It allows format: \n" + "Type.FeatureName \n" + "or \n" + "Type.FeatureName==FeatureValue \n") public void setannotationTypes(List<String> iat) { this.annotationTypes = iat; } public List<String> getannotationTypes() { return annotationTypes; } }