package gov.nih.ncgc.bard.capextract.handler;
import gov.nih.ncgc.bard.capextract.CAPConstants;
import gov.nih.ncgc.bard.capextract.CAPDictionary;
import gov.nih.ncgc.bard.capextract.CAPDictionaryElement;
import gov.nih.ncgc.bard.capextract.CAPUtil;
import gov.nih.ncgc.bard.capextract.ICapResourceHandler;
import gov.nih.ncgc.bard.capextract.jaxb.Dictionary;
import gov.nih.ncgc.bard.capextract.jaxb.Element;
import java.io.IOException;
import java.math.BigInteger;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
import java.util.Vector;
/**
* A one line summary.
*
* @author Rajarshi Guha
*/
public class DictionaryHandler extends CapResourceHandler implements ICapResourceHandler {
public DictionaryHandler() {
super();
}
/**
* Process a CAP entity that is located at some URL.
*
* @param url The URL from which to retrieve the entity fron
* @param resource The CAP resource that is meant to be processed. An implementing class
* can choose to proceed or not based on this parameter.
*/
public int process(String url, CAPConstants.CapResource resource) throws IOException {
if (resource != CAPConstants.CapResource.DICTIONARY) return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED;
log.info("Processing " + resource + " from " + url);
Dictionary d = getResponse(url, resource);
log.info("\tUnmarshalled dictionary");
CAPDictionary dict = process(d);
// serialize this to the db
Connection conn = null;
PreparedStatement pst;
java.util.Date today = null;
try {
conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl());
pst = conn.prepareStatement("delete from cap_dict_obj");
pst.executeUpdate();
pst.close();
pst = conn.prepareStatement("delete from cap_dict_elem");
pst.executeUpdate();
pst.close();
pst = conn.prepareStatement("INSERT INTO cap_dict_obj(ins_date, dict) VALUES (?, ?)");
today = new java.util.Date();
pst.setDate(1, new java.sql.Date(today.getTime()));
pst.setObject(2, dict);
pst.executeUpdate();
pst.close();
conn.commit();
log.info("\tSerialized dictionary object to database");
// now we dump in the dict elements (a partial representation) that will be useful
// for SQL queries. We're assuming for now that a dict elem is associated with
// a single ontology
pst = conn.prepareStatement("insert into cap_dict_elem (ins_date, dictid, label, description, abbreviation, ext_url, onto_name, onto_abbrv, onto_url, onto_id, element_status) values (?,?,?,?,?,?, ?,?,?,?, ?)");
for (CAPDictionaryElement elem : dict.getNodes()) {
pst.setDate(1, new java.sql.Date(today.getTime()));
pst.setInt(2, elem.getElementId().intValue());
pst.setString(3, elem.getLabel());
pst.setString(4, elem.getDescription());
pst.setString(5, elem.getAbbreviation());
pst.setString(6, elem.getExternalUrl());
pst.setString(7, elem.getOnto_name());
pst.setString(8, elem.getOnto_abbrv());
pst.setString(9, elem.getOnto_url());
pst.setString(10, elem.getOnto_id());
pst.setString(11, elem.getElementStatus());
pst.addBatch();
}
pst.executeBatch();
conn.commit();
log.info("\tStored (partial) dictionary elements to database");
conn.close();
} catch (com.mysql.jdbc.exceptions.jdbc4.MySQLIntegrityConstraintViolationException e) {
if (e.getMessage().indexOf("Duplicate entry") >= 0) {
log.warn("Already have a serialized dictionary for " + today + ", so not inserting");
}
} catch (SQLException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
return CAPConstants.CAP_EXTRACT_LOAD_STATUS_COMPLETE;
// TODO should handle resultType, units and descriptors
}
private CAPDictionary process(Dictionary d) throws IOException {
CAPDictionary dict = new CAPDictionary();
List<Element> elems = d.getElements().getElement();
for (Element elem : elems) {
dict.addNode(new CAPDictionaryElement(elem));
}
log.info("\tAdded " + dict.size() + " <element> entries");
int nrel = 0;
int nnoparent = 0;
List<Dictionary.ElementHierarchies.ElementHierarchy> hierarchies = d.getElementHierarchies().getElementHierarchy();
for (Dictionary.ElementHierarchies.ElementHierarchy h : hierarchies) {
String relType = h.getRelationshipType();
BigInteger childId = getElementId(h.getChildElement().getLink().getHref());
h.getChildElement().getLink().getHref();
//don't reset the extraction status so it perists at CAP.
//set the extraction status to complete.
//setExtractionStatus("Complete", h.getChildElement().getLink().getHref(), CAPConstants.CapResource.ELEMENT);
CAPDictionaryElement childElem = dict.getNode(childId);
// there may be an element with no parent
if (h.getParentElement() != null) {
BigInteger parentId = getElementId(h.getParentElement().getLink().getHref());
CAPDictionaryElement parentElem = dict.getNode(parentId);
dict.addOutgoingEdge(parentElem, childElem, null);
dict.addIncomingEdge(childElem, parentElem, relType);
} else nnoparent++;
nrel++;
}
log.info("\tAdded " + nrel + " parent/child relationships with " + nnoparent + " elements having no parent");
// ok'we got everything we need. Lets make it available globally
CAPConstants.setDictionary(dict);
return dict;
}
public Vector<Object> poll(String url, CAPConstants.CapResource resource, boolean skipPartial) throws IOException {
Vector<Object> vec = new Vector<Object>();
Dictionary d = getResponse(url, resource);
process(d);
vec.add(d);
return vec;
}
private BigInteger getElementId(String url) {
String[] comps = url.split("/");
return new BigInteger(comps[comps.length - 1]);
}
}