package edu.tufts.vue.mbs; /** * This code is NO LONGER USED IN VUE * With the module you can get significact words and prhases extracted from lots of content on your website. * This code is discontinued due to the closing of Yahoo's Term Extraction web service. The service will be terminated at the end of August. */ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Properties; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.*; import org.w3c.dom.*; import org.xml.sax.SAXException; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.PostMethod; import com.google.common.collect.AbstractMapEntry; import com.google.common.collect.Multimap; import edu.tufts.vue.metadata.MetadataList; import edu.tufts.vue.metadata.VueMetadataElement; import tufts.vue.AnalyzerAction; import tufts.vue.LWComponent; import tufts.vue.MetaMap; public class YahooAnalyzer implements LWComponentAnalyzer { private static final org.apache.log4j.Logger Log = org.apache.log4j.Logger.getLogger(AnalyzerAction.class); private static final String ANALYZER_NAME = "Yahoo Term Extractor"; public List analyze(LWComponent c) { return analyze(c,true); } public Multimap<String, AnalyzerResult> analyzeResource(LWComponent c) { return null; } public List<AnalyzerResult> analyze(LWComponent c, boolean fallback) { List<AnalyzerResult> results = new ArrayList<AnalyzerResult>(); String request = "http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction"; HttpClient client = new HttpClient(); PostMethod method = new PostMethod(request); method.addParameter("appid","sfeSlmvV34GVJdO3q6r7sECK4KVE0GIP3xVbWKtwu8Ta2gOCOSkAt1sC2FNl"); MetadataList ml = c.getMetadataList(); List<VueMetadataElement> elems = ml.getMetadata(); Iterator<VueMetadataElement> i = elems.iterator(); c.getNotes(); String context = c.getLabel() + " " + c.getNotes() + " "; while (i.hasNext()) { VueMetadataElement e = i.next(); context += e.getValue() + " "; } if (c.getResource() !=null) { MetaMap map = c.getResource().getProperties(); ///c.getResource().get if (map!=null) { Collection collection = map.entries(); // Iterator iterator = collection.iterator(); Object[] obj = collection.toArray(); for (int p = 0; p < obj.length; p++) { com.google.common.collect.AbstractMapEntry o = (AbstractMapEntry) obj[p]; if (o.getKey().equals("Title") || o.getKey().equals("Date") || o.getKey().equals("Creator") || o.getKey().equals("Description")) { System.out.println(o.toString()); context += o.getValue() + ". "; } } } } // method.addParameter("context","Manet was a painter at the second half of the 19th century"); URLEncoder.encode(context); method.addParameter("context",context); // Send POST request int statusCode; // BufferedReader br = null; InputStream rstream = null; try { statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { System.err.println("Method failed: " + method.getStatusLine()); } // Get the response body rstream = method.getResponseBodyAsStream(); // Process response Document response = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(rstream); //Get all search Result nodes // NodeList nodes = (NodeList)xPath.evaluate("/ResultSet/Result", response, XPathConstants.NODESET); NodeList nodes = response.getElementsByTagName("Result"); int nodeCount = nodes.getLength(); System.out.println("Node Count : " + nodeCount); //iterate over search Result nodes for (int i1 = 0; i1 < nodeCount; i1++) { // String value = (String)xPath.evaluate("Result", nodes.item(i), XPathConstants.STRING); System.out.println("Value: " + getTextValue(nodes.item(i1))); System.out.println("--"); // results.add(new Property().getTextValue(nodes.item(i1))); results.add(new AnalyzerResult("NA", getTextValue(nodes.item(i1)))); } } catch (HttpException e) { Log.error(e.getMessage()); } catch (IOException e) { Log.error(e.getMessage()); } catch (SAXException e) { Log.error(e.getMessage()); } catch (ParserConfigurationException e) { Log.error(e.getMessage()); } finally { if (rstream !=null) try { rstream.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return results; } private String getTextValue(Node node) { if (node.hasChildNodes()) { return node.getFirstChild().getNodeValue(); } else { return ""; } } public static void main(String[] args) { YahooAnalyzer ya = new YahooAnalyzer(); ya.analyze(null); } public String getAnalyzerName() { return ANALYZER_NAME; } }