package edu.tufts.vue.mbs;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map.Entry;
import java.net.URLDecoder;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import com.google.common.collect.AbstractMapEntry;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.alchemyapi.api.AlchemyAPI;
import com.alchemyapi.api.AlchemyAPI_NamedEntityParams;
import com.alchemyapi.api.AlchemyAPI_ConceptParams;
import edu.tufts.vue.metadata.VueMetadataElement;
import tufts.vue.LWComponent;
public class AlchemyAnalyzer implements LWComponentAnalyzer {
AlchemyAPI alchemy = null;
String alchemyAPIKey = null;
private static final String OK_STATUS = "OK";
private static final String ANALYZER_NAME = "Alchemy Analyzer";
private static final org.apache.log4j.Logger log =
org.apache.log4j.Logger.getLogger(AlchemyAnalyzer.class);
/* (non-Javadoc)
* @see edu.tufts.vue.mbs.LWComponentAnalyzer#analyze(tufts.vue.LWComponent, boolean)
*/
@SuppressWarnings("unchecked")
public List<AnalyzerResult> analyze(LWComponent c, boolean tryFallback) {
if (isEmpty(alchemyAPIKey))
throw new RuntimeException("AlchemyAPI Key is not specified");
if (null == c)
throw new IllegalArgumentException("Illegal data specified to analyze.");
StringBuilder strBuilder = new StringBuilder();
if (!isEmpty(c.getLabel()))
strBuilder.append(c.getLabel()).append(". ");
if (!isEmpty(c.getNotes()))
strBuilder.append(c.getNotes()).append(". ");
if (null != c.getMetadataList() && !isEmpty(c.getMetadataList().getMetadata()))
for (VueMetadataElement element : c.getMetadataList().getMetadata())
if (null != element && !isEmpty(element.getValue()))
strBuilder.append(element.getValue()).append(". ");
if (null != c.getResource() && null != c.getResource().getProperties() &&
!isEmpty(c.getResource().getProperties().entries()))
for(Entry entry : c.getResource().getProperties().entries())
if (null != entry && entry instanceof AbstractMapEntry)
{
final AbstractMapEntry aentry = (AbstractMapEntry)entry;
if (null != aentry.getKey())
{
final String key = aentry.getKey().toString().trim();
if (!isEmpty(key) &&
(key.startsWith("title") || key.startsWith("date") ||
key.startsWith("creator") || key.startsWith("description")))
strBuilder.append("The ").append(key).append(" is ")
.append(aentry.getValue().toString().trim())
.append(". ");
}
}
List<AnalyzerResult> result = new ArrayList<AnalyzerResult>();
final String context = strBuilder.toString();
if (!isEmpty(context))
{
Document doc = null;
try {
doc = alchemy.TextGetRankedNamedEntities(context);
} catch (Exception e) {
log.error("Alchemy TextGetRankedNamedEntities request failed", e);
throw new RuntimeException(e);
}
NodeList nodeList = doc.getElementsByTagName("entity");
if (null != nodeList && nodeList.getLength() > 0)
{
result = new ArrayList<AnalyzerResult>(nodeList.getLength());
for (int i = 0; i < nodeList.getLength(); ++i)
{
AnalyzerResult res = parseEntity(nodeList.item(i),null);
if (null != res)
result.add(res);
}
}
}
return result;
}
/* (non-Javadoc)
* @see edu.tufts.vue.mbs.LWComponentAnalyzer#analyze(tufts.vue.LWComponent)
*/
public List<AnalyzerResult> analyze(LWComponent c) {
return analyze(c,true);
}
/* (non-Javadoc)
* @see edu.tufts.vue.mbs.LWComponentAnalyzer#analyzeResource(tufts.vue.LWComponent)
*/
public Multimap<String, AnalyzerResult> analyzeResource(LWComponent c) throws Exception {
if (isEmpty(alchemyAPIKey))
throw new RuntimeException("AlchemyAPI Key is not specified");
if (null == c || null == c.getResource() || null == c.getResource().getSpec())
throw new IllegalArgumentException("Illegal resource specified to analyze.");
Multimap<String, AnalyzerResult> result = Multimaps.newArrayListMultimap();
Document doc = null;
try {
AlchemyAPI_NamedEntityParams entityParams = new AlchemyAPI_NamedEntityParams();
entityParams.setSourceText(AlchemyAPI_NamedEntityParams.RAW);
doc = alchemy.URLGetRankedNamedEntities(c.getResource().getSpec(), entityParams);
} catch (Exception e) {
log.error("Alchemy XXXX URLGetRankedNamedEntities request failed -----", e);
throw new RuntimeException(e);
}
NodeList nodeList = doc.getElementsByTagName("entity");
if (null != nodeList && nodeList.getLength() > 0)
for (int i = 0; i < nodeList.getLength(); ++i)
{
AnalyzerResult res = parseEntity(nodeList.item(i),null);
if (null != res)
result.put(res.getType(), res);
}
try {
AlchemyAPI_ConceptParams conceptParams = new AlchemyAPI_ConceptParams();
conceptParams.setSourceText(AlchemyAPI_ConceptParams.RAW);
doc = alchemy.URLGetRankedConcepts(c.getResource().getSpec());
} catch (Exception e) {
e.printStackTrace();
log.error("Alchemy XXXX URLGetRankedNamedEntities request failed " + c.getResource().getSpec() + " -----", e);
throw new RuntimeException(e);
}
nodeList = doc.getElementsByTagName("concept");
if (null != nodeList && nodeList.getLength() > 0)
for (int i = 0; i < nodeList.getLength(); ++i)
{
AnalyzerResult res = parseEntity(nodeList.item(i),"Concept");
if (null != res) {
result.put(res.getType(), res);
}
}
return result;
}
/* (non-Javadoc)
* @see edu.tufts.vue.mbs.LWComponentAnalyzer#getAnalyzerName()
*/
public String getAnalyzerName() { return ANALYZER_NAME; }
public String GetAlchemyAPIKey() { return alchemyAPIKey; }
public boolean IsAlchemyAPIKeySet() { return !isEmpty(alchemyAPIKey); }
public void SetAlchemyAPIKey(String key)
{
if (isEmpty(key))
throw new RuntimeException("AlchemyAPI Key is not specified");
AlchemyAPI alchemy = AlchemyAPI.GetInstanceFromString(key);
try {
alchemy.TextGetLanguage("Mother and Father");
} catch (Exception e) {
log.error("Alchemy TextGetLanguage request failed", e);
throw new RuntimeException(e);
}
alchemyAPIKey = key;
this.alchemy = alchemy;
}
private AnalyzerResult parseEntity(Node entity, String defaultType) {
int count = 0;
double relevance = .0;
String type = defaultType;
String value = null;
ArrayList subtypes = null;
Element disambiguatedNode = null;
for (Node child = entity.getFirstChild(); null != child; child = child.getNextSibling()) {
if (Node.ELEMENT_NODE == child.getNodeType())
{
if ("type".equals(child.getNodeName()))
type = getElementValue(child);
else if ("text".equals(child.getNodeName()))
try {
value = URLDecoder.decode(getElementValue(child), "UTF-8");
}
catch(Exception e) {
value = null;
}
else if ("disambiguated".equals(child.getNodeName()))
disambiguatedNode = (Element)child;
else try {
if ("relevance".equals(child.getNodeName()))
relevance = Double.valueOf(getElementValue(child));
else if ("count".equals(child.getNodeName()))
count = Integer.valueOf(getElementValue(child));
} catch (NumberFormatException ex) {}
}
}
if (null != disambiguatedNode)
{
NodeList nodeList = disambiguatedNode.getElementsByTagName("name");
if (null != nodeList && nodeList.getLength() > 0)
{
String name = getElementValue(nodeList.item(0));
if(!isEmpty(name))
value = name;
}
nodeList = disambiguatedNode.getElementsByTagName("subType");
if( null != nodeList && nodeList.getLength() > 0 )
{
subtypes = new ArrayList();
for( int i = 0; i<nodeList.getLength(); i++ ) {
subtypes.add(getElementValue(nodeList.item(i)));
}
}
}
if (!isEmpty(type) && !isEmpty(value)) {
AnalyzerResult retResults = new AnalyzerResult(type, value, relevance, count);
if( null != subtypes ) {
retResults.initOntologies();
retResults.addOntologies(subtypes);
while( subtypes.size() > 1 ) {
subtypes.remove(subtypes.size()-1);
}
retResults.initSubtypes();
retResults.addSubtypes(subtypes);
}
return retResults;
}
return null;
}
private String getElementValue(Node node)
{
String value = null;
NodeList nodes = node.getChildNodes();
if (nodes.getLength() == 1)
{
Node child = nodes.item(0);
if (null != child && child.getNodeType() == Node.TEXT_NODE)
value = child.getNodeValue();
}
return value;
}
private boolean isEmpty(String str)
{
return (null == str || str.length() <= 0);
}
private <T extends Collection<?>> boolean isEmpty(T collection) {
return (null == collection || collection.size() <= 0);
}
}