/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package com.meaningcloud.gate; import com.meaningcloud.gate.clients.ClassClient; import com.meaningcloud.gate.param.ASutil; import gate.Annotation; import gate.AnnotationSet; import gate.DocumentContent; import gate.Factory; import gate.FeatureMap; import gate.ProcessingResource; import gate.creole.AbstractLanguageAnalyser; import gate.creole.ExecutionException; import gate.creole.metadata.CreoleParameter; import gate.creole.metadata.CreoleResource; import gate.creole.metadata.Optional; import gate.creole.metadata.RunTime; import gate.util.InvalidOffsetException; import gate.util.Out; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * This class is the implementation of the resource MeaningCloud. */ @CreoleResource(name = "MeaningCloud Text Classification", comment = "MeaningCloud Text Classification", helpURL = "http://www.meaningcloud.com/developer/text-classification/doc/1.1", icon = "/MeaningCloud.png") public class MeaningCloudClass extends AbstractLanguageAnalyser implements ProcessingResource { private String inputASname, outputASname; private List<String> annotationTypes = new ArrayList<String>(); private String apiURL, key, title; private Boolean verbose, debug; private String model, categories; public static class category { public String code, label, abs_relevance, relevance; public static class term { public String form = "", abs_relevance = ""; } public List<term> term_list = new ArrayList<term>(); } public String textTransform(boolean bool) { String ret = bool ? "y" : "n"; return ret; } public void execute() throws ExecutionException { if (document == null) throw new ExecutionException("No document provided"); AnnotationSet inputAnnSet = document.getAnnotations(inputASname); // AnnotationSet outputAnnSet = document.getAnnotations(outputASname); String text = ""; String type = ""; DocumentContent content = document.getContent(); if (inputAnnSet.isEmpty()) { text += content.toString(); type = "_document"; process(text, type, null, inputAnnSet); } else { if (annotationTypes.size() == 0) { text += content.toString(); type = "_document"; process(text, type, null, inputAnnSet); /* * Iterator<Annotation> inputIt = * gate.Utils.inDocumentOrder(inputAnnSet).iterator(); * * while(inputIt.hasNext()){ Annotation ann = inputIt.next(); * type = "_"+ann.getType(); try { text = * content.getContent(ann.getStartNode().getOffset(), * ann.getEndNode().getOffset()).toString(); } catch * (InvalidOffsetException ex) { * Logger.getLogger(MeaningCloudClass * .class.getName()).log(Level.SEVERE, null, ex); } * process(text,type,ann,inputAnnSet); } */ } else { if (debug) Out.println("annotationTypes size: " + annotationTypes.size()); for (String inputAnnExpr : annotationTypes) { if (debug) Out.println("inputAnnExpr: " + inputAnnExpr); AnnotationSet filteredAS = ASutil.getFilteredAS( inputAnnSet, inputAnnExpr); if (debug) Out.println("FilteredAS: " + gate.Utils.cleanStringFor(document, filteredAS)); Iterator<Annotation> itr = gate.Utils.inDocumentOrder( filteredAS).iterator(); while (itr.hasNext()) { Annotation ann = itr.next(); type = "_" + ann.getType(); try { text = content.getContent( ann.getStartNode().getOffset(), ann.getEndNode().getOffset()).toString(); } catch (InvalidOffsetException ex) { Logger.getLogger(MeaningCloudClass.class.getName()) .log(Level.SEVERE, null, ex); } process(text, type, ann, inputAnnSet); } } } } } public void process(String text, String type, Annotation inputAnn, AnnotationSet outputAnnSet) { if (inputAnn != null) { if (debug) Out.println("Processing: " + inputAnn.getType()); } else { if (debug) Out.println("Processing the whole document"); } ClassClient c = new ClassClient(); String api = this.getapiURL(); String key = this.getkey(); String txt = text; if (!txt.isEmpty() && !txt.equals("0")) { if (debug) Out.println("Text: " + txt); Post post; try { post = new Post(api); if (key != null && !key.isEmpty()) post.addParameter("key", key); else { Logger.getLogger(MeaningCloudTopics.class.getName()).severe( "Key not set"); return; } post.addParameter("src", "gate_2.3"); post.addParameter("txt", txt); post.addParameter("of", "xml"); post.addParameter("verbose", textTransform(this.verbose)); if (this.getmodel() != null) post.addParameter("model", this.getmodel()); if (this.getcategories() != null) post.addParameter("categories", this.getcategories()); if (this.gettitle() != null) post.addParameter("title", this.gettitle()); byte[] response = post.getResponse().getBytes("UTF-8"); String resp = new String(response, "UTF-8"); if (debug) Out.println("Response:" + resp); // Show topics DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory .newInstance(); DocumentBuilder docBuilder; try { docBuilder = docBuilderFactory.newDocumentBuilder(); org.w3c.dom.Document doc = docBuilder .parse(new ByteArrayInputStream(response)); doc.getDocumentElement().normalize(); Element response_node = doc.getDocumentElement(); try { NodeList statusL = response_node .getElementsByTagName("status"); org.w3c.dom.Node status = statusL.item(0); NamedNodeMap attributes = status.getAttributes(); org.w3c.dom.Node code = attributes.item(0); if (!code.getTextContent().equals("0")) { Logger.getLogger(MeaningCloudTopics.class.getName()) .severe("API Error: " + code.getTextContent() + "" + post.params.toString()); } else { try { List<MeaningCloudClass.category> updated = ClassClient .collectInfo(response_node); setDocFeatures(updated, type, inputAnn/* * ,outputAnnSet */); } catch (Exception e) { Logger.getLogger( MeaningCloudClass.class.getName()).log( Level.SEVERE, null, e); } } } catch (Exception e) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, e); Logger.getLogger(MeaningCloudClass.class.getName()) .severe("Not found"); } } catch (ParserConfigurationException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } catch (SAXException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } } catch (UnsupportedEncodingException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(MeaningCloudClass.class.getName()).log( Level.SEVERE, null, ex); } } } public void setDocFeatures(List<MeaningCloudClass.category> category_list, String type, Annotation inputAnn/* ,AnnotationSet outputAnnSet */) throws InvalidOffsetException, UnsupportedEncodingException { if (category_list.size() > 0) { Iterator<category> it = category_list.iterator(); FeatureMap fm = Factory.newFeatureMap(); ArrayList<String> cat_code = new ArrayList<String>(); ArrayList<String> cat_label = new ArrayList<String>(); ArrayList<String> cat_relevance = new ArrayList<String>(); ArrayList<String> cat_abs_relevance = new ArrayList<String>(); ArrayList<ArrayList<String>> cat_term_forms = new ArrayList<ArrayList<String>>(); ArrayList<ArrayList<String>> cat_term_abs_relevance = new ArrayList<ArrayList<String>>(); while (it.hasNext()) { MeaningCloudClass.category cat = it.next(); cat_code.add((cat.code != null ? cat.code : "")); cat_label.add((cat.label != null ? cat.label : "")); cat_relevance.add((cat.relevance != null ? cat.relevance : "")); cat_abs_relevance .add((cat.abs_relevance != null ? cat.abs_relevance : "")); if (cat.term_list.size() > 0) { ArrayList<String> forms = new ArrayList<String>(); ArrayList<String> relevances = new ArrayList<String>(); Iterator<MeaningCloudClass.category.term> it2 = cat.term_list .iterator(); while (it2.hasNext()) { MeaningCloudClass.category.term t = it2.next(); forms.add(t.form.isEmpty() ? " " : t.form); relevances.add(t.abs_relevance.isEmpty() ? " " : t.abs_relevance); } cat_term_forms.add(forms); cat_term_abs_relevance.add(relevances); } } fm.put("category_code", cat_code); fm.put("category_label", cat_label); fm.put("category_relevance", cat_relevance); fm.put("category_abs_relevance", cat_abs_relevance); fm.put("category_term_form", cat_term_forms); fm.put("category_term_abs_relevance", cat_term_abs_relevance); if (inputAnn != null) { Logger.getLogger(MeaningCloudClass.class.getName()) .info("The categories detected in the text you have processed have been set as new Features of your inputAS."); FeatureMap fm2 = inputAnn.getFeatures(); fm2.putAll(fm); } else { Logger.getLogger(MeaningCloudClass.class.getName()) .info("The categories detected in the text you have processed have been set as new Features of your inputAS."); FeatureMap fm2 = document.getFeatures(); fm2.putAll(fm); } } else { Logger.getLogger(MeaningCloudClass.class.getName()) .info("According to api.meaningcloud.com, the text you have processed does not belong to any of the classes defined in the classification model you chose."); } } @RunTime @CreoleParameter(comment = "URL Of the API to query", defaultValue = "http://api.meaningcloud.com/class-1.1") public void setapiURL(String apiURL) { this.apiURL = apiURL; } public String getapiURL() { return apiURL; } @RunTime @CreoleParameter(comment = "License Key") public void setkey(String key) { this.key = key; } public String getkey() { return key; } @RunTime @CreoleParameter(defaultValue = "IPTC_en", comment = "Classification model to use. It will define into which categories the text may be classified.") public void setmodel(String m) { this.model = m; } public String getmodel() { return model; } @RunTime @Optional @CreoleParameter(comment = "List of prefixes of categories to which the classification is limited. Each value will be separated by '|'. All the categories that do not start with any of the prefixes specified in the list will not be taken account in the classification.") public void setcategories(String categories) { this.categories = categories; } public String getcategories() { return categories; } @RunTime @Optional @CreoleParameter(comment = "Descriptive title of the content") public void settitle(String t) { this.title = t; } public String gettitle() { return title; } @RunTime @Optional @CreoleParameter(comment = "AnnotationSet with the input content") public void setinputASname(String t) { this.inputASname = t; } public String getinputASname() { return inputASname; } @RunTime @Optional @CreoleParameter(comment = "Output Annotation Set", defaultValue = "MeaningCloud") public void setoutputASname(String t) { this.outputASname = t; } public String getoutputASname() { return outputASname; } @RunTime @Optional @CreoleParameter(defaultValue = "false", comment = "Verbose mode. Shows additional information about the classification.") public void setverbose(Boolean verb) { this.verbose = verb; } public Boolean getverbose() { return verbose; } @RunTime @Optional @CreoleParameter(defaultValue = "false", comment = "Debug variable for the GATE plugin") public void setdebug(Boolean verb) { this.debug = verb; } public Boolean getdebug() { return debug; } @RunTime @Optional @CreoleParameter(comment = "Filter content by this expression. It allows format: \n" + "Type.FeatureName \n" + "or \n" + "Type.FeatureName==FeatureValue \n") public void setannotationTypes(List<String> iat) { this.annotationTypes = iat; } public List<String> getannotationTypes() { return annotationTypes; } } // class MeaningCloudClass