/**
*
*/
package outputter;
import java.io.File;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;
import outputter.data.CompositeEntity;
import outputter.data.CompositeQuality;
import outputter.data.EQProposals;
import outputter.data.Entity;
import outputter.data.EntityProposals;
import outputter.data.FormalConcept;
import outputter.data.NegatedQuality;
import outputter.data.Quality;
import outputter.data.QualityProposals;
import outputter.data.REntity;
import outputter.data.RelationalQuality;
import outputter.data.SimpleEntity;
import outputter.evaluation.EQPerformanceEvaluation;
import outputter.knowledge.Dictionary;
import outputter.knowledge.ELKReasoner;
import outputter.knowledge.TermOutputerUtilities;
import outputter.output.HTMLOutput;
import outputter.prep.XMLNormalizer;
import outputter.process.BinaryCharacterStatementParser;
import outputter.process.CharacterStatementParser;
import outputter.process.StateStatementParser;
import outputter.search.TermSearcher;
/* annotation guideline: http://phenoscape.org/wiki/Guide_to_Character_Annotation */
/**
* @author Hong Updates
*This class output EQ statements from the XML files output by CharaParser
*The XML files are in \target\final
*
*This is in essence a rule-based algorithm.
*Rules for identifying the primary entity from the character statement (could also include character state statements) are implemented in KeyEntityFinder.java
*Rules for generating EQs from <structure> elements are in XML2EQ.createEQs4Structure method.
*Rules for matching an entity-entity locator pair are in EntitySearcher.java
*Interface to ontologies is TermSearcher.java and TermOutputerUtilities.java
*/
/**
* Entity: eye
* EntityID: TAO:1234567
* Quality:
* Quality-Negated: not round
* QN-Parent:shape
* QN-ParentID: PATO:0000113(id for shape)
* QualityID: PATO:0000111(id for round)
*
*
*/
@SuppressWarnings("static-access")
public class XML2EQ {
private static final Logger LOGGER = Logger.getLogger(XML2EQ.class);
private File source;
public static int unknownid = 0;
private String outputtable;
private int count = 0;
// private String keyentity = null;
private ArrayList<EntityProposals> keyentities;
//private String keyentitylocator = null;
//private ArrayList<EQStatementProposals> allEQs = null;
private ArrayList<EQProposals> allEQs = null;
private HashSet<String> stateids = new HashSet<String>();
private static ArrayList<String> serenostyle = new ArrayList<String>();
private String characters = null;
private XPath pathStructure;
private XPath pathWholeOrgStrucChar;
private XPath pathCharacter;
private XPath pathText2;
private XPath pathRelation;
private XPath pathRelationUnderCharacter;
private XPath pathStructure2;
private XPath pathCharacterText;
public static TermOutputerUtilities ontoutil;
public static ELKReasoner elk;
private static boolean recordperformance = true;
/*static{
try{
//TODO: figure out why the two calls give different results?
//elk = new ELKReasoner(TermOutputerUtilities.uberon);
elk = new ELKReasoner(new File(XML2EQ.uberon), true);
}catch(Exception e){
LOGGER.error("", e);
}
}*/
private Dictionary dictionary;
//private EntitySearcherOriginal es = new EntitySearcherOriginal(dictionary);
//private TermSearcher ts = new TermSearcher(dictionary);
//private CharacterHandler ch = new CharacterHandler(ts, es, ontoutil);
//private RelationHandler rh = new RelationHandler(dictionary, es);
//private KeyEntityFinder kef = new KeyEntityFinder(es);
public static final int RELATIONAL_SLIM=1;
public static final int ATTRIBUTE_SLIM=2;
public static String uberon;
public static String bspo;
public static String pato;
public static String uniquespatialterms;
public static String glossary;
//a convenient way to separate Sereno style from others by listing the source file names here.
//TODO replace it with a more elegant approach
/*static {
serenostyle.add("sereno");
serenostyle.add("martinez");
serenostyle.add("earlyevolutionofarchosaurs");
ontoutil = new TermOutputerUtilities(ApplicationUtilities.getProperty("ontology.dir"), ApplicationUtilities.getProperty("database.name"));
}*/
public XML2EQ(String sourcedir, String database, String outputtable, String uberon, String bspo, String pato, String spatialtermtable, String glossary) throws Exception {
this.source = new File(sourcedir);
this.outputtable = outputtable;
XML2EQ.uberon = uberon;
XML2EQ.pato = pato;
XML2EQ.bspo = bspo;
XML2EQ.uniquespatialterms = spatialtermtable;
XML2EQ.glossary = glossary;
XML2EQ.ontoutil = new TermOutputerUtilities();
this.dictionary = new Dictionary();
XML2EQ.elk = new ELKReasoner(new File(XML2EQ.uberon), true);
//this.keyentities = new ArrayList<Hashtable<String,String>>();
if(isRecordperformance()){
if(dictionary.conn == null){
Class.forName("com.mysql.jdbc.Driver");
dictionary.conn = DriverManager.getConnection(ApplicationUtilities.getProperty("database.url"));
}
Statement stmt = dictionary.conn.createStatement();
// label and id fields are ontology-related fields
// other fields are raw text
// entity and quality fields are atomic
// qualitynegated fields are alternative to quality and is composed as "not quality" for qualitynegated, "not(quality)" for qualitynegatedlabel, the "quality" has id
// qualityid
// qualitymodifier/label/id and entitylocator/label/id may hold multiple values separated by "," which preserves the order of multiple values
stmt.execute("drop table if exists " + outputtable);
//System.out.println("create table if not exists " + outputtable
// + " (id int(11) not null unique auto_increment primary key, source varchar(500), characterID varchar(100), characterlabel varchar(1000), stateID varchar(100), statelabel text, "
// + " entity varchar(3000),entitylabel varchar(3000), entityid varchar(3000), " + "quality varchar(3000),qualitylabel varchar(3000), qualityid varchar(3000),"+"relatedentity varchar(3000),relatedentitylabel varchar(3000), relatedentityid varchar(3000))");
//stmt.execute("create table if not exists " + outputtable
// + " (id int(11) not null unique auto_increment primary key, source varchar(500), characterID varchar(100), characterlabel varchar(1000), stateID varchar(100), statelabel text, "
// + " entity varchar(3000),entitylabel varchar(3000), entityid varchar(3000), " + "quality varchar(3000),qualitylabel varchar(3000), qualityid varchar(3000),"+"relatedentity varchar(3000),relatedentitylabel varchar(3000), relatedentityid varchar(3000))" );
stmt.execute("create table if not exists " + outputtable
+ " (id int(11) not null unique auto_increment primary key, source varchar(500), characterID varchar(100), characterlabel varchar(1000), stateID varchar(100), statelabel text, "
+ " entity text,entitylabel text, entityid text, " + "quality text,qualitylabel text, qualityid text,"+"relatedentity text,relatedentitylabel text, relatedentityid text, unontologizedentity text,unontologizedquality text,unontologizedrelatedentity text)" );
}
pathStructure = XPath.newInstance(".//structure");
pathWholeOrgStrucChar= XPath.newInstance(".//structure[@name='"+ApplicationUtilities.getProperty("unknown.structure.name")+"']/character");
pathCharacter = XPath.newInstance(".//character");
pathText2 = XPath.newInstance(".//text");
pathRelation = XPath.newInstance(".//relation");
pathRelationUnderCharacter = XPath.newInstance(".//statement[@statement_type='character']/relation");
pathStructure2 = XPath.newInstance(".//structure");
//pathCharacterText = XPath.newInstance(".//.//statement[@statement_type='character']/text");
pathCharacterText = XPath.newInstance(".//statement[@statement_type='character']/text");
}
@SuppressWarnings("unchecked")
public void outputEQs() {
File[] xmlfiles = this.source.listFiles();
for (File f : xmlfiles) {
try{
String src = f.getName();
SAXBuilder builder = new SAXBuilder();
Document xml = builder.build(f);
Element root = xml.getRootElement();
new XMLNormalizer(root).normalize();
// if(count!= 67){ count++; continue;}
System.out.println("[" + count + "]" + src);
count++;
//allEQs = new ArrayList<EQStatementProposals>();
allEQs = new ArrayList<EQProposals>(); //allEQs from an xml file
Element characterstatement = (Element) XMLNormalizer.pathCharacterStatement.selectSingleNode(root);
System.out.println("text: " + characterstatement.getChildText("text"));
List<Element> statestatements = XMLNormalizer.pathStateStatement.selectNodes(root);
int btype = isBinary(statestatements);
if(btype>0){
EQProposals posempty = new EQProposals();
EQProposals negempty = new EQProposals();
boolean pos = false; //used for incomplete binary statements: only one value (T/F) is present
boolean neg = false;
for(Element statestatement: statestatements){
EQProposals empty = new EQProposals();
empty.setSourceFile(src);
empty.setCharacterId(characterstatement.getAttributeValue("character_id"));
empty.setCharacterText(characterstatement.getChildText("text"));
empty.setStateId(statestatement.getAttributeValue("state_id"));
empty.setStateText(statestatement.getChildText("text"));
if(statestatement.getChildText("text").matches(Dictionary.binaryTvalues1+"|"+Dictionary.binaryTvalues2)){
posempty = empty;
pos = true;
}else{
negempty = empty;
neg = true;
}
}
if(! pos) posempty = negempty; //these two steps are needed only for incomplete statements (e.g., with only postive or negative states)
if(! neg) negempty = posempty;
BinaryCharacterStatementParser bcsp = new BinaryCharacterStatementParser(ontoutil,characterstatement.getChildText("text"), btype);
bcsp.parse(characterstatement, root, posempty, negempty);
if(bcsp.getEQStatements().size()==0){
for(Element statestatement: statestatements){
EQProposals empty = new EQProposals();
empty.setSourceFile(src);
empty.setCharacterId(characterstatement.getAttributeValue("character_id"));
empty.setCharacterText(characterstatement.getChildText("text"));
empty.setStateId(statestatement.getAttributeValue("state_id"));
empty.setStateText(statestatement.getChildText("text"));
empty.setEntity(new EntityProposals());
empty.setQuality(new QualityProposals());
allEQs.add(empty);
}
}else{
allEQs = bcsp.getEQStatements();
}
}else{
CharacterStatementParser csp = new CharacterStatementParser(ontoutil);
//if do not allow EQ to be generated from character statement alone, comment out the following lines.
EQProposals empty = new EQProposals();
empty.setSourceFile(src);
empty.setCharacterId(characterstatement.getAttributeValue("character_id"));
empty.setCharacterText(characterstatement.getChildText("text"));
empty.setStateId(characterstatement.getAttributeValue("state_id"));
empty.setStateText(characterstatement.getChildText("text"));
empty.setType("character");
csp.parse(characterstatement, root, empty);
//allEQs.addAll(csp.getEQStatements()); //disallow EQs solely from character statement.
keyentities = csp.getKeyEntities();
LOGGER.debug("XML2EQ: received keyentities");
for(EntityProposals ep: keyentities) LOGGER.debug(".."+ep.toString());
ArrayList<String> qualityclue = csp.getQualityClue();
StateStatementParser ssp = new StateStatementParser(ontoutil, keyentities, qualityclue,characterstatement.getChildText("text"));
for(Element statestatement: statestatements){
LOGGER.debug("XML2EQ: processing state statement...");
System.out.println("text: " + statestatement.getChildText("text"));
empty = new EQProposals();
empty.setSourceFile(src);
empty.setCharacterId(statestatement.getAttributeValue("character_id"));
empty.setCharacterText(characterstatement.getChildText("text"));
empty.setStateId(statestatement.getAttributeValue("state_id"));
empty.setStateText(statestatement.getChildText("text"));
empty.setType("state");
ssp.parse(statestatement, root, empty);
if(ssp.getEQStatements().size()==0){
//EQProposals empty = new EQProposals();
empty.setSourceFile(src);
empty.setCharacterId(statestatement.getAttributeValue("character_id"));
empty.setCharacterText(characterstatement.getChildText("text"));
empty.setStateId(statestatement.getAttributeValue("state_id"));
empty.setStateText(statestatement.getChildText("text"));
empty.setEntity(new EntityProposals());
empty.setQuality(new QualityProposals());
allEQs.add(empty);
}else{
allEQs.addAll(ssp.getEQStatements());
}
ssp.clearEQStatements();
}
fixIncompleteStates(src, root);//try to fix states with incomplete EQs by drawing info from EQs from other states
}
outputEQs4CharacterUnit();
}catch(Exception e){
LOGGER.error("", e);
}
}
if(isRecordperformance()){
HTMLOutput output = new HTMLOutput();
output.outputHTML(this.outputtable,"curator",0, this.outputtable);
}
elk.dispose();
}
/**
* use workbench to select/keep only the ones in the workbench
*/
/*
* private void discardNonTestCharacterUnits() throws Exception {
* Statement stmt = conn.createStatement();
* stmt.execute("delete from "+this.outputtable+" where source not in (select source from "+this.benchmarktable+ ")");
* }
*/
/**
* perform global sanity check and normalization
* global sanity check: one state text may generate n EQs but one of them must hold keyentities
* in the following example, EQ 2 and 3 are wrong
*
* [11]Armbruster_2004.xml_0ada121b-dfa5-4093-8ceb-483163cae12e.xml
* text::Lateral wall of metapterygoid channel
* text::absent
* 1 EQ::[E]lateral wall [Q]absent [EL]metapterygoid channel
* text::just a slight ridge
* 2 EQ::[E]ridge [Q]slight [just]
* text::triangular
* 3 EQ::[E]lateral wall [Q]triangular [EL]metapterygoid channel
* text::broad ridge, perpendicular to metapterygoid
* 4 EQ::[E]ridge [Q]broad [QM]metapterygoid
* text::long and rounded along entire length
* 5 EQ::[E]lateral wall [Q]long [EL]metapterygoid channel
* 6 EQ::[E]lateral wall [Q]rounded [along entire length] [EL]metapterygoid channel
*
*
* some of the EQs don't have a Q part: StateStatementParser.
*/
private void outputEQs4CharacterUnit() throws Exception {
//for (EQStatementProposals EQ : allEQs) {
for (EQProposals EQ : allEQs) {
if(isRecordperformance()) this.insertEQs2Table(EQ);
System.out.println(EQ.toString());
}
}
private void insertEQs2Table(EQProposals eQ)
{
String entity ="";
String entitylabel="";
String entityid="";
String quality ="";
String qualitylabel="";
String qualityid="";
String relatedentity ="";
String relatedentitylabel="";
String relatedentityid="";
String tempstring ="",tempid="",tempunontologized="",tempqualityunontologized="";
String unontologizedentity ="";
String unontologizedquality ="";
String unontologizedrelatedentity="";
//fill in empty E/Q for E=null and/or Q=null cases
if(eQ.getEntity()==null){
eQ.setEntity(new EntityProposals());
}
if(eQ.getQuality()==null){
eQ.setQuality(new QualityProposals());
}
//Read all Entity Proposals and store as comma separated values
for(Entity e: eQ.getEntity().getProposals())
{
entitylabel+=e.getLabel()+" Score:["+e.getConfidenceScore()+"]@,"; //Hong 1/26/2014
//String[] temp = e.getLabel().split("\\s*,\\s*");
//for(String t: temp) entitylabel+=t+" Score:["+e.getConfidenceScore()+"]@,";
if(e instanceof CompositeEntity)
{
tempstring=((CompositeEntity) e).getFullString()+" Score:["+e.getConfidenceScore()+"]@,";
tempid=((CompositeEntity) e).getFullID()+" Score:["+e.getConfidenceScore()+"]@,";
tempunontologized = ((CompositeEntity) e).getunontologized().replaceAll("(#)$", "");
}
else
{
tempstring=e.getString()+" Score:["+e.getConfidenceScore()+"]@,";
tempid=e.getId()+" Score:["+e.getConfidenceScore()+"]@,";
if(e instanceof REntity)
{
tempunontologized = ((REntity) e).getunontologized().replaceAll("(#)$", "");
} else
{
tempunontologized = ((SimpleEntity) e).getunontologized().replaceAll("(#)$", "");
}
}
entity+=tempstring;
entityid+=tempid;
if(tempunontologized.equals("") == false)
{
unontologizedentity += tempunontologized+"@,";
}
}
entity = entity.replaceAll("(@,)$", "");
entitylabel = entitylabel.replaceAll("(@,)$", "");
entityid = entityid.replaceAll("(@,)$", "");
unontologizedentity = unontologizedentity.replaceAll("(@,)$", "");
entity =sort(entity);//sort according to scores
entitylabel =sort(entitylabel);
entityid =sort(entityid);
tempstring="";
tempid="";
tempunontologized="";
//Read all Quality Proposals and store as comma separated values
for(Quality q:eQ.getQuality().getProposals())
{
tempqualityunontologized="";
if(q instanceof RelationalQuality)
{
quality+=((RelationalQuality)q).getFullString()+" Score:["+q.getConfidenceScore()+"]@,";
qualityid+=q.getId()+" Score:["+q.getConfidenceScore()+"]@,";
tempqualityunontologized =((RelationalQuality)q).getUnOntologized().replaceAll("(#)$", "");
if(q.isOntologized()==true)
{
qualitylabel+=q.getLabel()+" Score:["+q.getConfidenceScore()+"]@,";
} else
{
qualitylabel+=((RelationalQuality)q).getFullString()+" Score:["+q.getConfidenceScore()+"]@,";
}
//Reading all related entities and store as comma separated values
tempstring="";
tempid="";
tempunontologized="";
for(Entity e:((RelationalQuality) q).getRelatedEntity().getProposals())
{
relatedentitylabel+=e.getLabel()+" Score:["+e.getConfidenceScore()+"]@,";
if(e instanceof CompositeEntity)
{
tempstring=((CompositeEntity) e).getFullString()+" Score:["+e.getConfidenceScore()+"]@,";
tempid=((CompositeEntity) e).getFullID()+" Score:["+e.getConfidenceScore()+"]@,";
tempunontologized = ((CompositeEntity) e).getunontologized().replaceAll("(#)$", "");
}
else
{
tempstring=e.getString()+" Score:["+e.getConfidenceScore()+"]@,";
tempid=e.getId()+" Score:["+e.getConfidenceScore()+"]@,";
if(e instanceof REntity)
{
tempunontologized = ((REntity) e).getunontologized().replaceAll("(#)$", "");
} else
{
tempunontologized = ((SimpleEntity) e).getunontologized().replaceAll("(#)$", "");
}
}
relatedentity+=tempstring;
relatedentityid+=tempid;
if(tempunontologized.equals("")==false)
{
unontologizedrelatedentity+=tempunontologized+"@,";
}
}
}
else if((q instanceof CompositeQuality))
{
quality+=((CompositeQuality)q).getFullString()+" Score:["+q.getConfidenceScore()+"]@,";
qualityid+=((CompositeQuality)q).getFullId()+" Score:["+q.getConfidenceScore()+"]@,";
tempqualityunontologized =((CompositeQuality)q).getUnOntologized().replaceAll("(#)$", "");
if(q.isOntologized()==true)
{
qualitylabel+=((CompositeQuality)q).getFullLabel()+" Score:["+q.getConfidenceScore()+"]@,";
}else
{
qualitylabel+=((CompositeQuality)q).getFullString()+" Score:["+q.getConfidenceScore()+"]@,";
}
}
else if((q instanceof NegatedQuality))
{
quality+=((NegatedQuality)q).getFullString()+" Score:["+q.getConfidenceScore()+"]@,";
qualityid+=((NegatedQuality)q).getFullId()+" Score:["+q.getConfidenceScore()+"]@,";
tempqualityunontologized=((NegatedQuality)q).getUnOntologized().replaceAll("(#)$", "");
if(((NegatedQuality)q).isOntologized()==true)
{
qualitylabel+=((NegatedQuality)q).getFullLabel()+" Score:["+q.getConfidenceScore()+"]@,";
} else
{
qualitylabel+=((NegatedQuality)q).getFullString()+" Score:["+q.getConfidenceScore()+"]@,";
}
}
else
{
quality+=q.getString()+" Score:["+q.getConfidenceScore()+"]@,";
qualityid+=q.getId()+" Score:["+q.getConfidenceScore()+"]@,";
tempqualityunontologized=q.getUnOntologized().replaceAll("(#)$", "");
if(q.isOntologized()==true)
{
qualitylabel+=q.getLabel()+" Score:["+q.getConfidenceScore()+"]@,";
}else
{
qualitylabel+=q.getString()+" Score:["+q.getConfidenceScore()+"]@,";
}
}
if(tempqualityunontologized.equals("")==false)
{
unontologizedquality += tempqualityunontologized + "@,";
}
}
relatedentity = relatedentity.replaceAll("(@,)$", "");
relatedentitylabel = relatedentitylabel.replaceAll("(@,)$", "");
relatedentityid = relatedentityid.replaceAll("(@,)$", "");
quality = quality.replaceAll("(@,)$", "");
qualitylabel = qualitylabel.replaceAll("(@,)$", "");
qualityid = qualityid.replaceAll("(@,)$", "");
unontologizedquality = unontologizedquality.replaceAll("(@,)$", "");
unontologizedrelatedentity = unontologizedrelatedentity.replaceAll("(@,)$", "");
quality = sort(quality);
qualitylabel = sort(qualitylabel);
qualityid = sort(qualityid);
relatedentity = sort(relatedentity);
relatedentitylabel = sort(relatedentitylabel);
relatedentityid = sort(relatedentityid);
String sql = "insert into "+this.outputtable +" (source,characterID,characterlabel,stateID,statelabel, entity,"+
"entitylabel,entityid,quality,qualitylabel,qualityid,relatedentity,relatedentitylabel,relatedentityid,unontologizedentity,unontologizedquality,unontologizedrelatedentity) values"+
"(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
try {
PreparedStatement preparedStatement = dictionary.conn.prepareStatement(sql);
preparedStatement.setString(1, eQ.getSourceFile());
preparedStatement.setString(2, eQ.getCharacterId());
preparedStatement.setString(3,eQ.getCharacterText());
preparedStatement.setString(4,eQ.getStateId());
preparedStatement.setString(5,eQ.getStateText());
preparedStatement.setString(6,entity);
preparedStatement.setString(7,entitylabel);
preparedStatement.setString(8,entityid);
preparedStatement.setString(9,quality);
preparedStatement.setString(10,qualitylabel);
preparedStatement.setString(11,qualityid);
preparedStatement.setString(12,relatedentity);
preparedStatement.setString(13,relatedentitylabel);
preparedStatement.setString(14,relatedentityid);
preparedStatement.setString(15, unontologizedentity);
preparedStatement.setString(16, unontologizedquality);
preparedStatement.setString(17, unontologizedrelatedentity);
preparedStatement.executeUpdate();
} catch (SQLException e1) {
e1.printStackTrace();
}
}
private String sort(String groups)
{
String individualstrings[] = groups.split("(@,)");//Splitting original Proposals into individual tokens upon ],(Comma alone cannot be used, so using ],)
String sortedstring ="";
Hashtable <Float,ArrayList<String>> holder = new Hashtable <Float,ArrayList<String>>();
//Extracting the float value and creating an has Float, String
for(String token:individualstrings)
{
if((token!=null) &&(token.equals("")==false))
{
Float value = Float.parseFloat(token.split("(\\[)")[1].replaceAll("]$", ""));
if(holder.get(value)==null)
{
ArrayList<String> list = new ArrayList<String>();
list.add(token);
holder.put(value, list);
}
else
{
ArrayList<String> list = holder.get(value);
list.add(token);
holder.put(value, list);
}
}
}
//sorting the Keyset of the hashtable
Set<Float> keys =holder.keySet();
Object keyarray[] = keys.toArray();
ArrayList<Float> sortedkeys = new ArrayList<Float>();
for(int i=0;i<keyarray.length;i++)
{
for(int j=i+1;j<keyarray.length;j++)
{
if((Float)keyarray[i]<(Float)keyarray[j])
{
Float temp = (Float) keyarray[i];
keyarray[i] = keyarray[j];
keyarray[j] = temp;
}
}
}
//Iterating through the sorted keyset and creating the sorted final String
for(int i=0;i<keyarray.length;i++)
{
ArrayList<String> templist = holder.get(keyarray[i]);
Collections.sort(templist);
for(String temp:templist)
{
sortedstring+=temp+"@,";
}
}
return sortedstring.replaceAll("(@,)$", "");
}
/* private ArrayList<String> printEntity(ArrayList<Entity> proposals) {
ArrayList<String> entitystrings = new ArrayList<String>();
String entity ="";
String entitylabel="";
String entityid="";
for(Entity e:proposals)
{
if(e instanceof CompositeEntity)
{
ArrayList<String> entitystring = printEntity(((CompositeEntity) e).getEntities());
entity+=entitystring.get(0);
entitylabel+=entitystring.get(1);
entityid+=entitystring.get(2);
}
else{
entity+=e.getString()+",";
entitylabel+=e.getLabel()+",";
entityid+=e.getId()+",";
}
}
entitystrings.add(entity.replaceAll(",$",""));
entitystrings.add(entitylabel.replaceAll(",$",""));
entitystrings.add(entityid.replaceAll(",$",""));
return entitystrings;
}*/
/**
*
[11]Armbruster_2004.xml_0ada121b-dfa5-4093-8ceb-483163cae12e.xml
* text::Lateral wall of metapterygoid channel
* text::absent
* 1 EQ::[E]lateral wall [Q]absent [EL]metapterygoid channel
* text::just a slight ridge
* 2 EQ::[E]ridge [Q]slight [just]
* text::triangular
* 3 EQ::[E]lateral wall [Q]triangular [EL]metapterygoid channel
* text::broad ridge, perpendicular to metapterygoid
* 4 EQ::[E]ridge [Q]broad [QM]metapterygoid
* text::long and rounded along entire length
* 5 EQ::[E]lateral wall [Q]long [EL]metapterygoid channel
* 6 EQ::[E]lateral wall [Q]rounded [along entire length] [EL]metapterygoid channel
*
*
*
*
* turn EQ::[E]ridge [Q]slight [just] to
* EQ::[E]lateral wall [Q]ridge [just slight] [EL]metapterygoid channel
*
* @param problems
* : EQs failed the sanity check
*/
/*private void repairProblemEQs(ArrayList<Hashtable<String, String>> problems) {
// to repair the first EQ
// EQ #2 in the above example
Hashtable<String, String> EQ = problems.get(0);
String olde = EQ.get("entity");
String oldq = EQ.get("quality").replaceAll("[\\[\\]]", "");
String oldqm = EQ.get("qualitymodifier");
String oldel = EQ.get("entitylocator");
for (String s : keyentities) {
EQ.put("entity", s);
}
EQ.put("entitylocator", keyentitylocator == null ? "" : keyentitylocator);
EQ.put("quality", olde + " [" + oldq + "]");
EQ.put("qualitymodifier", oldel + "," + oldqm);
}*/
/**
* For example, 1 character statement with 3 state statements
* <statement statement_type="character" character_id="0a1e6749-13fc-47be-bc7f-8184fc9c26ad" seg_id="0">
* <text>Shape of ancistrine opercle (ordered )</text>
* <structure id="o650" name="whole_organism">
* <character name="shape" value="shape" constraint="of ancistrine opercle" constraintid="o651" />
* </structure>
* <structure id="o651" name="opercle" constraint="ancistrine" />
* </statement>
*
* <statement statement_type="character_state" character_id="0a1e6749-13fc-47be-bc7f-8184fc9c26ad" state_id="4a99e866-54d9-4875-8b5e-385427db1245" seg_id="0">
* <text>sickle-shaped (<i>Peckoltia</i>-type )</text>
* <structure id="o652" name="whole_organism">
* <character name="shape" value="sickle-shaped" />
* </structure>
* </statement>
*
* <statement statement_type="character_state" character_id="0a1e6749-13fc-47be-bc7f-8184fc9c26ad" state_id="d53ba92f-0865-4456-9111-c6ff37fc624a" seg_id="0">
* <text>barshaped (<i>Ancistrus</i>-type )</text>
* <structure id="o653" name="whole_organism">
* <character name="shape" value="barshaped" />
* </structure>
* </statement>
*
* <statement statement_type="character_state" character_id="0a1e6749-13fc-47be-bc7f-8184fc9c26ad" state_id="f56a9b6a-9720-437c-a1f4-60f01cd1bb15" seg_id="0">
* <text>oval or triangular</text>
* <structure id="o654" name="whole_organism">
* <character name="shape" value="oval" />
* <character name="shape" value="triangular" />
* </structure>
* </statement>
*
* @param statements
*/
/**
* character: L1, L2, ..., Ln, V q(may contain QM, eg. length relative to eyes)
* states: V1, V2
*
* so for each state [e.g., V1]: E = Ln, Q=V1 [q], QM=parse from q, EL=L1, ..., L(n-1)
*
* @param charstatements
* @param states
* @param src
* @param root
*/
/*@SuppressWarnings("unchecked")
private void createEQs4CharacterUnitInSerenoStyle(List<Element> charstatements, List<Element> states, String src, Element root) throws Exception {
// collect category="character" terms from the glossarytable
if (this.characters == null) {
Statement stmt = dictionary.conn.createStatement();
ResultSet rs = stmt.executeQuery("select distinct term from " + this.glosstable + " where category='character' " + "union " + "select distinct term from "
+ this.tableprefix + "_term_category where category='character' ");
while (rs.next()) {
this.characters += rs.getString(1) + "|";
}
this.characters = characters.replaceFirst("\\|$", "");
}
// get E and ELs from character statement
addEQ4CharacterStatement(src, charstatements);
String chtext = charstatements.get(0).getChild("text").getTextTrim();
chtext = markcharacters(chtext);
System.out.println(chtext);
chtext = chtext.replaceAll("\\(.*?\\)", "");
String[] chparts = chtext.toLowerCase().split("\\s*,\\s*");
List<Element> structs = pathStructure.selectNodes(charstatements.get(0));
ArrayList<String> snames = new ArrayList<String>();
for (Element struct : structs) {
snames.add(Utilities.getStructureName(root, struct.getAttributeValue("id")).replaceFirst("(?<=\\w)_(?=\\d+$)", " "));
}
String E = "";
String ELs = "";
for (int i = 0; i < chparts.length; i++) {
String n = firstMatchedStructureName(chparts[i], snames, i);// match in absence of/before [character]
if (n != null) {
snames.remove(n);
ELs = E + "," + ELs;
E = n;
String rest = chparts[i].replaceFirst(n, "").trim();
String moreELs = "";
while (rest.length() > 0) {
n = firstMatchedStructureName(rest, snames, i * -1);
if (n != null) {
snames.remove(n);
moreELs = moreELs + "," + n;
rest = rest.replaceFirst(".*?\\b" + n, "").trim();
} else {
break;
}
}
ELs = moreELs + "," + ELs;
}
}
// get QM
// need to be changed
String QMs = "";
for (String sname : snames) {// some remaining structures after [character] are QMs
if (chtext.indexOf("] of " + sname) > 0) {// Postorbital, [form] of dorsal surface
ELs = E + "," + ELs;
E = sname;
} else {
QMs += sname + ",";
}
}
ELs = ELs.replaceAll(",+", ",").replaceFirst("^,", "").replaceFirst(",$", "");
// process states
Hashtable<String, String> EQ = new Hashtable<String, String>();
Utilities.initEQHash(EQ);
EQ.put("source", src);
EQ.put("entity", E);
EQ.put("entitylocator", ELs);
EQ.put("type", "state");
for (Element state : states) {
Hashtable<String, String> EQc = (Hashtable<String, String>) EQ.clone();
String description = state.getChild("text").getTextTrim();
EQc.put("description", description);
EQc.put("characterid", state.getAttributeValue("character_id"));
EQc.put("stateid", state.getAttributeValue("state_id"));
Element firststruct = (Element) state.getChildren("structure").get(0);
if (!firststruct.getAttributeValue("name").contains("whole_organism")) {
// noun as state
String fsname = Utilities.getStructureName(root, firststruct.getAttributeValue("id"));
String characterstr = charactersAsString(root, firststruct);
if (description.endsWith(fsname)) {// form: low crest (noun as state)
EQc.put("quality", characterstr + " " + fsname);
this.allEQs.add(EQc);
} else {
EQc.put("entitylocator", EQc.get("entity") + "," + EQc.get("entitylocator").replaceFirst(",$", ""));
EQc.put("entity", fsname);
EQc.put("quality", characterstr);
this.allEQs.add(EQc);
}
} else {
// collecting all characters of whole_organism
List<Element> chars = pathWholeOrgStrucChar.selectNodes(state);
for (Element chara : chars) {
Hashtable<String, String> EQi = (Hashtable<String, String>) EQc.clone();
EQi.put("quality", charactersAsString(root, firststruct));
if (chara.getAttribute("constraintid") != null) {
String names = Utilities.getStructureName(root, chara.getAttributeValue("constraintid"));
names = names + "," + Utilities.getStructureChain(root, "//relation[@name='part_of'][@from='" + chara.getAttributeValue("constraintid") + "']");
names = names.replaceFirst(",$", "");
QMs = QMs + "," + names;
}
QMs = QMs.replaceFirst(",$", "").replaceFirst("^,", "").replaceAll(",+", ",");
EQi.put("qualitymodifier", QMs);
this.allEQs.add(EQi);
}
// collecting relations of whole_organism
List<Element> wos = XMLNormalizer.pathWholeOrganismStructure.selectNodes(state);
for (Element wo : wos) {
String id = wo.getAttributeValue("id");
List<Element> rels = XPath.selectNodes(state, ".//relation[@from='" + id + "']");
for (Element rel : rels) {
Hashtable<String, String> EQi = (Hashtable<String, String>) EQc.clone();
String relname = rel.getAttributeValue("name");
String toid = rel.getAttributeValue("to");
String toname = Utilities.getStructureName(root, toid);
toname = toname + "," + Utilities.getStructureChain(root, "//relation[@name='part_of'][@from='" + toid + "']");
toname = toname.replaceFirst(",$", "");
String negation = rel.getAttributeValue("negation");
if (negation.contains("true")) {
EQi.put("qualitynegated", "not " + relname);
} else {
EQi.put("quality", relname);
}
EQi.put("qualitymodifier", toname);
this.allEQs.add(EQi);
}
}
}
// deal with other structures
}
}*/
/**
*
* @param firststruct
* @return all character value as a string
*/
@SuppressWarnings("unchecked")
private String charactersAsString(Element root, Element firststruct) throws Exception {
String chstring = "";
List<Element> chars = pathCharacter.selectNodes(firststruct);
for (Element chara : chars) {
String m = (chara.getAttribute("modifier") == null ? "" : chara.getAttributeValue("modifier"));
chstring += chara.getAttributeValue("value") + " ";
if (m.length() > 0) {
chstring += "[" + m + "] ";
}
}
chstring.trim();
return chstring;
}
/**
* tooth, height => tooth, [height]
*
* @param chtext
* @return
*/
private String markcharacters(String chtext) {
String[] chars = this.characters.split("\\|");
for (String chara : chars) {
chtext = chtext.replaceAll(chara, "[" + chara + "]");
}
chtext = chtext.replaceAll("\\]+", "]").replaceAll("\\[+", "[");
return chtext;
}
/**
* this works only when the names in text are in singular form as those in snames
* this may be true for Sereno style
*
* @param text
* : contains no , or ;
* @param snames
* @return a structure name from snames that appear the earliest from text before a [character]
*/
private String firstMatchedStructureName(String text, ArrayList<String> snames, int i) {
if (snames.size() == 0)
return null;
String textc = text;
text = text.replaceFirst("\\[.*$", "") + " ";//remove the character term
do {
for (String sname : snames) {
sname = sname.toLowerCase().replaceAll("_", " ");
//Changes by Zilong
// Pattern structRoman = Pattern.compile("(.*) [/dixv]+");
// Matcher m = structRoman.matcher(sname);
// if(m.matches()){
//
// }
//Changed by Zilong end
// if(!sname.matches(".*?[ivx\\d]+") && sname.length()>=3) sname = sname.substring(0, sname.length()-2);
if (text.startsWith(sname)) {
if (textc.endsWith(sname) && i == 0) {
snames.remove(sname);
return textc.trim();
}
return textc.trim();
}
}
text = text.replaceFirst("^.*?\\s+", "");
} while (text.length() > 0);
return null;
}
/*@SuppressWarnings("unchecked")
private Element getFalseState(List<Element> states) {
// copy or negate the EQ for each state
for (Element state : states) {
Element text = state.getChild("text");
String stext = text.getTextTrim();
if (stext.matches("(" + Dictionary.binaryFvalues + ")")) {
return state;
}
}
return null;
}*/
/*private Element getTrueState(List<Element> states) {
// copy or negate the EQ for each state
for (Element state : states) {
Element text = state.getChild("text");
String stext = text.getTextTrim();
if (stext.matches("(" + Dictionary.binaryTvalues + ")")) {
return state;
}
}
return null;
}*/
/**
* BinaryCharacter: those taking yes/no or present/absent as character states.
*
* case 1: "expanded ribs: present/absent" =>ribs: expanded/not expanded
*
* ?Preopercular latero-sensory canal leaves preopercle at first exit and enters a plate: yes/no?
* =>Preopercular latero-sensory canal: position (1 EQ)
*
* TODO
* text::Prearticular with mesially projecting flange on dorsal edge along posterior border of adductor fossa
* text::no
* text::yes
*
* No need to analyze state statements ( since they are binary values).
* Analyzing character statement alone is sufficient to generate one or more EQs
*
*
* @param chars
* @param src
* @param root
* @return an arraylist of EQs, each is an EQ-hashtable. Only
*/
@SuppressWarnings("unchecked")
/*
* Fill in the following in EQ
* EQ.put("quality", "");
EQ.put("qualitylabel", "");
EQ.put("qualityid", "");
EQ.put("qualitynegated", "");
EQ.put("qualitynegatedlabel", "");
EQ.put("qnparentlabel", "");
EQ.put("qnparentid", "");
*/
private void insertQualityNegated(String qualitynegated, Hashtable<String, String> EQ){
String term = qualitynegated.replaceFirst("not ", "").trim();
}
/**
* if all states hold a binary value, return true, otherwise return false
* example:
* yes but interrupted by Meckelian foramina or fenestrae
* yes by prearticular
* @param states
* @return -1: not a binary statement; 1: present/absent; 2: yes/no
*/
private int isBinary(List<Element> states) throws Exception {
if (states.size() == 0)
return -1;
boolean pa1 = true;
boolean yn2 = true;
for (Element state : states) {
Element text = (Element) pathText2.selectSingleNode(state);
String value = text.getTextTrim();
if (!value.matches("(" + Dictionary.binaryTvalues1 + "|" + Dictionary.binaryFvalues1 +"|"+ Dictionary.binaryTvalues2 + "|" + Dictionary.binaryFvalues2 + ")")) {
return -1;
}
if (!value.matches("(" + Dictionary.binaryTvalues2 + "|" + Dictionary.binaryFvalues2 + ")")) {
yn2 = false;
}
if (!value.matches("(" + Dictionary.binaryTvalues1 + "|" + Dictionary.binaryFvalues1 + ")")) {
pa1 = false;
}
}
if(yn2) return 2;
if(pa1) return 1;
return -1;
}
//check allEQs to identify the case like
//[0]Swartz 2012.xml_states1034.xml
//text::Body scale [morphology]
//text::<rhomboid> with internal ridge
//text::round
//round is a shape, then the 1st state should be about shape too, 'rhomboid' is not a structure, but a shape in PATO
//reprocess state 1, looking for a shape term in PATO
//TODO: (needs info about other characters to know pit and ridge are not important, regular is important)
//text::Nature of dermal ornament
//text::tuberculate
//text::fairly regular pit and ridge
//text::irregular [PATO:irregular spatial pattern, irregular shape, irregular sleep pattern, etc.]
//text::absent or almost absent
//TODO make sure each state has an EQ on the key entities.?
private void fixIncompleteStates(String src, Element root) {
ArrayList<String> incompletestateids = new ArrayList<String>();//not ontologized character state (0 EQ for this state)
//ArrayList<EQStatement> completestateids = new ArrayList<EQStatement>();//ontologized E and Q
ArrayList<EQProposals> completestateids = new ArrayList<EQProposals>();//ontologized E and Q
identifyStates(incompletestateids, completestateids);
if(incompletestateids.size()!=0){
//find qualityids from completed states for the key entities
ArrayList<String> qualitylabels = new ArrayList<String>();
//EQStatement keyEQ = null;
EQProposals keyEQ = null;
/*for(EQStatement EQ: completestateids){
String entitylabel = null;
Entity e = EQ.getEntity();
if(e instanceof SimpleEntity) entitylabel = ((SimpleEntity)e).getLabel();
else entitylabel = ((CompositeEntity)e).getPrimaryEntity().getLabel();
if(matchWithKeyEntities(entitylabel)){
keyEQ = EQ;
String qlabel = EQ.getQuality().getLabel();
if(qlabel.compareTo("absent")!=0) qualitylabels.add(qlabel); //ignore absent
}
}*/
//collect qualities from complete states
for(EQProposals EQ: completestateids){
String entitylabel = null;
EntityProposals ep = EQ.getEntity();
for(Entity e: ep.getProposals()){
if(e instanceof SimpleEntity) entitylabel = ((SimpleEntity)e).getLabel();
else entitylabel = ((CompositeEntity)e).getTheSimpleEntity().getLabel();
if(matchWithKeyEntities(entitylabel)){
keyEQ = EQ;
QualityProposals qp = EQ.getQuality();
for(Quality q: qp.getProposals()){ //what if q is a RelationalQuality?
if(q instanceof RelationalQuality){
for(Quality q1: ((RelationalQuality) q).getQuality().getProposals()){
if(q1.getClassIRI().compareTo(Dictionary.absent.getClassIRI())!=0) qualitylabels.add(q1.getLabel()); //ignore absent
}
}else if(q instanceof NegatedQuality){
Quality q1 = ((NegatedQuality) q).getQuality();
if(q1.getClassIRI().compareTo(Dictionary.absent.getClassIRI())!=0) qualitylabels.add(q1.getLabel()); //ignore absent
}else if(q instanceof CompositeQuality){
Quality q1 = ((CompositeQuality) q).getMainQuality();
if(q1.getClassIRI().compareTo(Dictionary.absent.getClassIRI())!=0) qualitylabels.add(q1.getLabel()); //ignore absent
}else{ //simple quality
if(q.getClassIRI().compareTo(Dictionary.absent.getClassIRI())!=0) qualitylabels.add(q.getLabel()); //ignore absent
}
}
}
}
}
//deal with incomplete states
for(String stateid: incompletestateids){
boolean solved = false;
String text = "";
try{
Element texte = (Element) XPath.selectSingleNode(root, ".//statement[@state_id='"+stateid+"']/text");
text = texte.getTextNormalize();
}catch(Exception e){
LOGGER.error("", e);
}
String [] tokens = text.split("\\s+");
for(int n =1; n <= (tokens.length>=4?4:tokens.length); n++){
for(int b = 0; b < tokens.length-n+1; b++){
String ngram = Utilities.join(tokens, b, b+n-1, " ");
ngram = ngram.replaceAll("[()\\[\\]{}?+]", "");
//TODO consider negation
ArrayList<FormalConcept> qs = new TermSearcher().searchTerm(ngram, "quality");
if(qs!=null){
for(FormalConcept fc: qs){
String qlabel = fc.getLabel();
String cp = commonParent(qlabel, qualitylabels);
if(cp!=null && cp.matches(".*?\\b("+dictionary.patoupperclasses+")\\b.*")){//TODO matches parent quality or any of its offsprings is fine.
//EQStatementProposals EQp = relatedEQ(stateid, ngram);
EQProposals EQp = relatedEQ(stateid, ngram);
if(EQp==null){ //add one
//EQp = new EQStatementProposals();
EQp = new EQProposals();
//EQStatement EQ = new EQStatement();
//add metadata
String characterid = "";
try{
Element statement = (Element) XPath.selectSingleNode(root, ".//statement[@state_id='"+stateid+"']");
characterid = statement.getAttributeValue("character_id");
}catch(Exception e){
LOGGER.error("", e);
}
EQp.setSource(src);
EQp.setCharacterId(characterid);
EQp.setStateId(stateid);
EQp.setStateText(text);
//EQp.add(EQ);
allEQs.add(EQp);
}
}
//accept this result for this stateid
/*EQStatement EQ = EQp.getProposals().get(0); //assuming there is only one candidate???
EQ.setEntity(keyEQ.getEntity());
EQ.setQuality(q);*/
solved = true;
break;
}
}
}
if(solved) break;
}
}
}
}
/**
*
* @param stateid
* @param ngram
* @return the EQ from allEQs with the stateid and included ngram in an element
*/
/*private EQStatementProposals relatedEQ(String stateid, String ngram) {
ArrayList<EQStatementProposals> EQs = this.getEQsforState(stateid);
for(EQStatementProposals EQ: EQs){
String value = EQ.getPhrase();
if(value!=null && value.length()>0 && (value.contains(ngram) || ngram.contains(value))){
return EQ;
}
}
return null;
}*/
private EQProposals relatedEQ(String stateid, String ngram) {
ArrayList<EQProposals> EQs = this.getEQsforState(stateid);
for(EQProposals EQ: EQs){
String value = EQ.getStateText();
if(value!=null && value.length()>0 && (value.contains(ngram) || ngram.contains(value))){
return EQ;
}
}
return null;
}
/**
* return the average distance of qlabel to qualitylabels in their ontology
* if qlabel is from an ontology different from qualitylabels, set the distance = 1000
* @param qid
* @param qualityids
* @return
*/
private String commonParent(String qlabel, ArrayList<String> qualitylabels) {
for(String qualitylabel: qualitylabels){
String cp = commonParentBtw(qlabel, qualitylabel);
if(cp!=null && cp.matches(".*?\\b("+dictionary.patoupperclasses+")\\b.*")){
return cp;
}
}
return null;
}
/**
* return the average distance of qid1 to qid2 in their ontologies
* if qid1 is from an ontology different from qid2, set the distance = 1000
* @param qid
* @param qualityid
* @return
*/
private String commonParentBtw(String qlabel1, String qlabel2) {
ArrayList<String> path1 = new ArrayList<String> ();
ArrayList<String> path2 = new ArrayList<String> ();
String parent = qlabel1;
String temp[];
while(parent.compareTo("quality")!=0){
temp=ontoutil.retreiveParentInfoFromPATO(parent);
parent = temp!=null?temp[1]:null;
if((parent==null)||(parent.length()==0)) break;
path1.add(parent);
}
parent = qlabel2;
while(parent.compareTo("quality")!=0){
temp=ontoutil.retreiveParentInfoFromPATO(parent);
parent = temp!=null?temp[1]:null;
if((parent==null)||(parent.length()==0)) break;
path2.add(parent);
}
if(path1.size()==0 || path2.size()==0) return null;
int dist1 = 0;
int dist2 = 0;
for(String p1 : path1){
dist1++;
dist2 = 0;
for(String p2: path2){
dist2++;
if(p2.matches(".*(^|,)"+p1+"(,|$).*")){
if(p2.contains(p1)) return p1;
if(p1.contains(p2)) return p2;
}
}
}
return null;
}
/**
*
* @param entitylabel
* @return true if the entitylabel matches one of the key entities.
*/
private boolean matchWithKeyEntities(String entitylabel) {
if (this.keyentities == null) return false;
for(EntityProposals keyentityp: this.keyentities){
for(Entity keyentity: keyentityp.getProposals()){
String label = null;
if(keyentity instanceof SimpleEntity) label = ((SimpleEntity)keyentity).getLabel();
if(keyentity instanceof CompositeEntity) label = ((CompositeEntity)keyentity).getTheSimpleEntity().getLabel();
if(label !=null && label.compareTo(entitylabel)==0) return true;
}
}
return false;
}
/**
*
* @param stateid
* @return the EQs in allEQs that have the stateid
*/
/*private ArrayList<EQStatementProposals> getEQsforState(String stateid) {
ArrayList<EQStatementProposals> EQs = new ArrayList<EQStatementProposals>();
for(EQStatementProposals EQp: allEQs){
for(EQStatement EQ: EQp.getProposals()){
if(EQ.getStateId().compareTo(stateid)==0){
EQs.add(EQp);
continue;
}
}
}
return EQs;
}*/
private ArrayList<EQProposals> getEQsforState(String stateid) {
ArrayList<EQProposals> EQs = new ArrayList<EQProposals>();
for(EQProposals EQp: allEQs){
if(EQp.getStateId()!=null && EQp.getStateId().compareTo(stateid)==0){ //characters do n
EQs.add(EQp);
continue;
}
}
return EQs;
}
/**
* populate two parameters with results saved in allEQs
* @param incompletestateids: EQs of states with keyentity as an entity but without any qualityid/label, and states without any entity or quality
* @param completestateids: EQs of states with keyentity as an entity and with qualityid/label
*/
/*private void identifyStates(ArrayList<String> incompletestateids,
ArrayList<EQStatement> completestateeqs) {
for(EQStatementProposals EQp: allEQs){
if(EQp.getType() !=null && EQp.getType().compareTo("state")==0){
String stateid = EQp.getStateId();
ArrayList<EQStatementProposals> EQs = getEQsforState(stateid);
boolean hasentity = false;
boolean hasquality = false;
boolean haskeyentity = false;
for(EQStatementProposals aEQp: EQs){
//need to examine the effectiveness of this method in the context of the proposals
//should only highconfidence score EQs be considered?
for(EQStatement aEQ: aEQp.getProposals()){
Entity E = aEQ.getEntity();
String e = null;
hasentity = false;
hasquality = false;
haskeyentity = false;
if(E instanceof SimpleEntity)
{
e = ((SimpleEntity)E).getLabel();
if(((SimpleEntity)E).isOntologized()==true)
{
if(e.length()>0) hasentity = true;
if(hasentity && matchWithKeyEntities(e)) haskeyentity = true; //haskeyentity is true if any of the proposal meets the condition
}
}
else
{
e= ((CompositeEntity)E).getPrimaryEntity().getLabel();
if(((CompositeEntity)E).isOntologized()==true)
{
if(e.length()>0) hasentity = true;
if(hasentity && matchWithKeyEntities(e)) haskeyentity = true;
}
}
String q = aEQ.getQuality()!=null?aEQ.getQuality().getLabel():""; //ternary operator added => Hariharan
if(q==null) q="";
if(q.length()>0) hasquality = true;
if(haskeyentity && hasquality) completestateeqs.add(aEQ);
}
}
if(!hasquality) incompletestateids.add(stateid); //none of the EQs for the state has a ontologized quality
//if(!hasentity && !hasquality) incompletestates.addAll(EQs);
//if(haskeyentity && !hasquality) incompletestates.addAll(EQs); //none of the EQs for the state has a ontologized quality
}
}
}*/
private void identifyStates(ArrayList<String> incompletestateids,
ArrayList<EQProposals> completestateeqs) {
for(EQProposals EQp: allEQs){
if(EQp.getType() !=null && EQp.getType().compareTo("state")==0){
String stateid = EQp.getStateId();
ArrayList<EQProposals> EQs = getEQsforState(stateid);
boolean hasentity = false;
boolean hasquality = false;
boolean haskeyentity = false;
for(EQProposals aEQp: EQs){
hasentity = false;
hasquality = false;
haskeyentity = false;
//need to examine the effectiveness of this method in the context of the proposals
//should only highconfidence score EQs be considered?
//if any E is good?
if(aEQp.getEntity()==null){
hasentity = false;
haskeyentity = false;
}else{
for(Entity E: aEQp.getEntity().getProposals()){
String e = null;
if(E instanceof SimpleEntity)
{
e = ((SimpleEntity)E).getLabel();
if(((SimpleEntity)E).isOntologized()==true)
{
if(e.length()>0) hasentity = true;
if(hasentity && matchWithKeyEntities(e)) haskeyentity = true; //haskeyentity is true if any of the proposal meets the condition
}
}
else
{
e= ((CompositeEntity)E).getTheSimpleEntity().getLabel();
if(((CompositeEntity)E).isOntologized()==true)
{
if(e.length()>0) hasentity = true;
if(hasentity && matchWithKeyEntities(e)) haskeyentity = true;
}
}
}
}
//if any Q is good?
if(aEQp.getQuality()!=null){
for(Quality Q: aEQp.getQuality().getProposals()){
String q = Q!=null &&Q.isOntologized()? Q.getLabel():""; //ternary operator added => Hariharan
if(q==null) q="";
if(q.length()>0) hasquality = true;
}
}
if(haskeyentity && hasquality) completestateeqs.add(aEQp);
if(!hasquality) incompletestateids.add(stateid); //none of the EQs for the state has a ontologized quality
//if(!hasentity && !hasquality) incompletestates.addAll(EQs);
//if(haskeyentity && !hasquality) incompletestates.addAll(EQs); //none of the EQs for the state has a ontologized quality
}
}
}
}
/**
* select structures that have characters and/or are from structure in a relation
*
* @param statement
* @return
*/
@SuppressWarnings("unchecked")
private List<Element> selectEntityStructures(Element statement) {
ArrayList<Element> selected = new ArrayList<Element>();
try{
List<Element> allstructs = pathStructure2.selectNodes(statement);
for (Element struct : allstructs) {
if (struct.getChildren().size() > 0)
selected.add(struct);
else {
String id = struct.getAttributeValue("id");
List<Element> from = XPath.selectNodes(statement, ".//relation[@from='" + id + "']");
if (from.size() > 0)
selected.add(struct);
}
}
}catch(Exception e){
LOGGER.error("", e);
}
return selected;
}
/*private void insertEQs2Table(EQStatement EQ) throws Exception {
// print
String entitylabel = EQ.get("entitylabel")+"["+EQ.get("entityid")+"]";
String quality = EQ.get("qualitylabel")+"["+EQ.get("qualityid")+"]";
String qualitynegated = EQ.get("qualitynegatedlabel");
String qualitymodifierlabel = EQ.get("qualitymodifierlabel")+"["+EQ.get("qualitymodifierid")+"]";
String entitylocator = EQ.get("entitylocator");
String entitylocatorlabel = EQ.get("entitylocatorlabel")+"["+EQ.get("entitylocatorid")+"]";
if (quality.length() == 0 && qualitynegated.length() == 0){
System.out.println("EQ::[E]" + entitylabel + " [Q]" + quality + (qualitymodifierlabel.length() > 0 ? " [QM]" + qualitymodifierlabel : "")
+ (entitylocatorlabel.length() > 0 ? " [EL]" + entitylocatorlabel : ""));
}else
// quality and qualitynegated can not both hold values! //changed by hong march 2013, they can hold the same value: quality=qualitynegated
// if(quality.length()>0 || entitylocator.length()>0){
if (quality.length() > 0) {
System.out.println("EQ::[E]" + entitylabel + " [Q]" + quality + (qualitymodifierlabel.length() > 0 ? " [QM]" + qualitymodifierlabel : "")
+ (entitylocatorlabel.length() > 0 ? " [EL]" + entitylocatorlabel : ""));
} else if (qualitynegated.length() > 0) {
System.out.println("EQ::[E]" + entitylabel + " [QN]" + qualitynegated + (qualitymodifierlabel.length() > 0 ? " [QM]" + qualitymodifierlabel : "")
+ (entitylocatorlabel.length() > 0 ? " [EL]" + entitylocatorlabel : ""));
} else if (quality.length() == 0 && qualitynegated.length() == 0 && entitylocator.length() > 0) {
System.out.println("EQ::[E]" + entitylabel + " [Q]" + quality + (qualitymodifierlabel.length() > 0 ? " [QM]" + qualitymodifierlabel : "")
+ (entitylocatorlabel.length() > 0 ? " [EL]" + entitylocatorlabel : ""));
} else {
if (EQ.get("type").compareTo("character") != 0)
System.out.println("A EQ was not printed");
}
// compose sql for insertion
String fieldstring = "";
String valuestring = "";
while (fields.hasMoreElements()) {
String f = fields.nextElement();
if (f.compareTo("type") != 0) {
fieldstring += f + ",";
String fv = EQ.get(f);
if (EQ.get("type").compareTo("character") == 0) {
valuestring += "'" + (f.matches("(source|characterid|description)") ? fv : "") + "',";
} else {
valuestring += "'" + fv + "',";
}
}
}
fieldstring = fieldstring.replaceFirst(",$", "");
valuestring = valuestring.replaceFirst(",$", "");
String q = "insert into " + this.outputtable + "(" + fieldstring + ") values " + "(" + valuestring + ")";
Statement stmt = dictionary.conn.createStatement();
stmt.execute(q);
}*/
/**
* [8]Armbruster_2004.xml_0638f15b-0de4-45fd-a3af-b1d209cea9d3.xml
* text::Walls of metapterygoid channel
* text::lateral wall slightly smaller to just slightly larger than mesial wall, or absent
* EQ::[E]lateral wall [Q]smaller [slightly]
* EQ::[E]lateral wall [Q]larger [just slightly] [QM]mesial wall
* EQ::[E]lateral wall [Q]absent
* text::mesial wall much taller
* EQ::[E]mesial wall [Q]taller [much]
*
* @param entity
* @return
*/
/*private boolean isRelated2KeyEntities(String entity) {
String[] tokens = entity.split("\\s*,\\s*");
for (String token : tokens) {
for(String keyentity:this.keyentities){
if (token.contains(keyentity) || keyentity.contains(token))
return true;
}
}
return false;
}*/
//if not akeyentity and not key element, may need to constructure new entity and/or inherit entity locators from keyentities.
/**
*
* @param EQ: to be updated with an entity locator
* @param entitylabel
*/
//private void inheritEntityLocator(EQStatement EQ, String entity){
private void inheritEntityLocator(EQProposals EQ, String entity){
/* String elid = EQ.get("entitylocatorid");
for(Entity keyentity: this.keyentities){
String keyentityphrase = keyentity.getPrimaryEntityString();
if(keyentityphrase!=null && keyentityphrase.compareTo(entity)==0){ //if entityphrase and keyentityphrase are the same, inherit the entity locator
String entitylocator = keyentity.get("entitylocator");
String entitylocatorid = keyentity.get("entitylocatorid");
String entitylocatorlabel = keyentity.get("entitylocatorlabel");
if(elid==null || elid.length()==0){
EQ.put("entitylocator", entitylocator==null? "":entitylocator);
EQ.put("entitylocatorid", entitylocatorid==null? "":entitylocatorid);
EQ.put("entitylocatorlabel", entitylocatorlabel==null? "":entitylocatorlabel);
}else if(elid.compareTo(entitylocatorid)!=0){
EQ.put("entitylocator", EQ.get("entitylocator")+","+entitylocator==null? "":entitylocator);
EQ.put("entitylocatorid", EQ.get("entitylocatorid")+","+entitylocatorid==null? "":entitylocatorid);
EQ.put("entitylocatorlabel", EQ.get("entitylocatorlabel")+","+entitylocatorlabel==null? "":entitylocatorlabel);
}
}
}*/
}
/**
* if resultsfromrelations.get("entitylocator")!=null
* @param resultsfromrelations
*/
/* private void addentitylocator4keyentities(
Hashtable<String, Object> resultsfromrelations, String entitylabel) {
if(resultsfromrelations != null && entitylabel !=null){
String entitylocator = (String)resultsfromrelations.get("entitylocator");
if(entitylocator != null){
String entitylocatorid = (String)resultsfromrelations.get("entitylocatorid");
String entitylocatorlabel = (String)resultsfromrelations.get("entitylocatorlabel");
for(Hashtable<String, String> keyentity: this.keyentities){
String keyentitylabel = keyentity.get("entitylabel");
if(keyentitylabel!=null){
if(keyentitylabel.compareTo(entitylabel)==0){
keyentity.put("entitylocator", entitylocator);
if(entitylocatorid!=null) keyentity.put("entitylocatorid", entitylocatorid);
if(entitylocatorlabel!=null) keyentity.put("entitylocatorlabel", entitylocatorlabel);
}
}
}
}
}
}*/
/**
* find the <statement> parent of the struct from the root
* return character id and state id
*
* @param root
* @param struct
* @return characterid and stateid
*/
private Hashtable<String, String> getStateId(Element root, Element struct) {
Hashtable<String, String> srcids = new Hashtable<String, String>();
Element statement = struct.getParentElement();
srcids.put("characterid", statement.getAttributeValue("character_id"));
String stateid = statement.getAttribute("state_id") == null ? "" : statement.getAttributeValue("state_id");
srcids.put("stateid", stateid);
return srcids;
}
/**
* @param args
*/
public static void main(String[] args) {
//evaluation runs
String database =ApplicationUtilities.getProperty("database.name");
//String prefix =ApplicationUtilities.getProperty("table.prefix");
String srcdir = ApplicationUtilities.getProperty("source.dir");
ArrayList<String> srcdirs = new ArrayList<String>();
srcdirs.add(srcdir+"original/target/final");
srcdirs.add(srcdir+"38484/target/final/");
srcdirs.add(srcdir+"40674/target/final/");
srcdirs.add(srcdir+"40676/target/final/");
srcdirs.add(srcdir+"40716/target/final/");
srcdirs.add(srcdir+"40717/target/final/");
srcdirs.add(srcdir+"40718/target/final/");
srcdirs.add(srcdir+"best/target/final/");
String outputtable=ApplicationUtilities.getProperty("table.output");
ArrayList<String> outputtables = new ArrayList<String>();
outputtables.add(outputtable+"_original");
outputtables.add(outputtable+"_38484");
outputtables.add(outputtable+"_40674");
outputtables.add(outputtable+"_40676");
outputtables.add(outputtable+"_40716");
outputtables.add(outputtable+"_40717");
outputtables.add(outputtable+"_40718");
outputtables.add(outputtable+"_best");
String ontodir = ApplicationUtilities.getProperty("ontology.dir");
String uberon = ontodir+"/"+ApplicationUtilities.getProperty("ontology.uberon");
String bspo = ontodir+"/"+ApplicationUtilities.getProperty("ontology.bspo");
String pato = ontodir+"/"+ApplicationUtilities.getProperty("ontology.pato");
String spatialtermtable = "uniquespatialterms";
String glossary = "fishglossaryfixed";
ArrayList<String> uberons = new ArrayList<String> ();
ArrayList<String> bspos = new ArrayList<String> ();
ArrayList<String> patos = new ArrayList<String> ();
ArrayList<String> spatials = new ArrayList<String> ();
uberons.add(uberon+".owl");
uberons.add(uberon+"_38484"+".owl");
uberons.add(uberon+"_40674"+".owl");
uberons.add(uberon+"_40676"+".owl");
uberons.add(uberon+"_40716"+".owl");
uberons.add(uberon+"_40717"+".owl");
uberons.add(uberon+"_40718"+".owl");
uberons.add(uberon+"_best.owl");
bspos.add(bspo+".owl");
bspos.add(bspo+"_38484"+".owl");
bspos.add(bspo+"_40674"+".owl");
bspos.add(bspo+"_40676"+".owl");
bspos.add(bspo+"_40716"+".owl");
bspos.add(bspo+"_40717"+".owl");
bspos.add(bspo+"_40718"+".owl");
bspos.add(bspo+"_best.owl");
patos.add(pato+".owl");
patos.add(pato+"_38484"+".owl");
patos.add(pato+"_40674"+".owl");
patos.add(pato+"_40676"+".owl");
patos.add(pato+"_40716"+".owl");
patos.add(pato+"_40717"+".owl");
patos.add(pato+"_40718"+".owl");
patos.add(pato+"_best.owl");
spatials.add(spatialtermtable);
spatials.add(spatialtermtable+"_38484");
spatials.add(spatialtermtable+"_40674");
spatials.add(spatialtermtable+"_40676");
spatials.add(spatialtermtable+"_40716");
spatials.add(spatialtermtable+"_40717");
spatials.add(spatialtermtable+"_40718");
spatials.add(spatialtermtable+"_all");
//add glossary
ArrayList<String> glossaries = new ArrayList<String>();
glossaries.add("orig_"+glossary);
glossaries.add("38484_"+glossary);
glossaries.add("40674_"+glossary);
glossaries.add("40676_"+glossary);
glossaries.add("40716_"+glossary);
glossaries.add("40717_"+glossary);
glossaries.add("40718_"+glossary);
glossaries.add("all_"+glossary);
//for(int i = 0; i <8; i++){
/*try {
int i = 7;
System.out.println("Run XML2EQ with "+srcdirs.get(i) + "," + database + "," + outputtables.get(i) + "," + uberons.get(i) + "," + bspos.get(i) + "," + patos.get(i) + "," + spatials.get(i) + "," +glossaries.get(i));
XML2EQ x2e = new XML2EQ(srcdirs.get(i), database, outputtables.get(i), uberons.get(i), bspos.get(i), patos.get(i), spatials.get(i), glossaries.get(i));
x2e.outputEQs();
}catch(Exception e){
LOGGER.error("", e);
}*/
//}
ArrayList<String> goldstandards = new ArrayList<String> ();
goldstandards.add("naive_38484");
goldstandards.add("naive_40674");
goldstandards.add("naive_40676");
goldstandards.add("knowledge_40716");
goldstandards.add("knowledge_40717");
goldstandards.add("knowledge_40718");
String setting = "sym";
//original onto
for(int i=0; i<6; i++){
//int i=5;
System.out.println("Evaluation with "+ database + "," + outputtables.get(0) + "," + goldstandards.get(i) + "," +"evaluationrecords" + "," +outputtables.get(0)+"_"+goldstandards.get(i));
EQPerformanceEvaluation pe = new EQPerformanceEvaluation(database, outputtables.get(0), goldstandards.get(i),"evaluationrecords", outputtables.get(0)+"_"+goldstandards.get(i)+"_"+setting);
pe.evaluate();
}
//curator enhanced onto
for(int i=0; i<6; i++){
//int i = 5;
System.out.println("Evaluation with "+database + "," + outputtables.get(i+1) + "," + goldstandards.get(i) + "," +"evaluationrecords" + "," + outputtables.get(i+1)+"_"+goldstandards.get(i));
EQPerformanceEvaluation pe = new EQPerformanceEvaluation(database, outputtables.get(i+1), goldstandards.get(i),"evaluationrecords", outputtables.get(i+1)+"_"+goldstandards.get(i)+"_"+setting);
pe.evaluate();
}
//best onto
for(int i=0; i<6; i++){
//int i = 5;
System.out.println("Evaluation with "+database + "," + outputtables.get(7)+ "," + goldstandards.get(i)+ "," +"evaluationrecords" + "," + outputtables.get(7)+"_"+goldstandards.get(i));
EQPerformanceEvaluation pe = new EQPerformanceEvaluation(database, outputtables.get(7), goldstandards.get(i),"evaluationrecords", outputtables.get(7)+"_"+goldstandards.get(i)+"_"+setting);
pe.evaluate();
}
/*String srcdir = ApplicationUtilities.getProperty("source.dir")+"40674/target/test/";
System.out.println(srcdir);
String database =ApplicationUtilities.getProperty("database.name");
//String outputtable=ApplicationUtilities.getProperty("table.output");
String outputtable=ApplicationUtilities.getProperty("table.output")+"_40674_debug";
//String prefix =ApplicationUtilities.getProperty("table.prefix");
//String glosstable = "fishglossaryfixed";
String ontodir = ApplicationUtilities.getProperty("ontology.dir");
String uberon = ontodir+System.getProperty("file.separator")+ApplicationUtilities.getProperty("ontology.uberon")+"_40674.owl";
String bspo = ontodir+System.getProperty("file.separator")+ApplicationUtilities.getProperty("ontology.bspo")+"_40674.owl";
String pato = ontodir+System.getProperty("file.separator")+ApplicationUtilities.getProperty("ontology.pato")+"_40674.owl";
String runsetting = "debug";
String glossary = "40674_fishglossaryfixed";
try {
//XML2EQ x2e = new XML2EQ(srcdir, database, outputtable, uberon, bspo, pato, "uniquespatialterms", glossary);
XML2EQ x2e = new XML2EQ(srcdir, database, outputtable, uberon, bspo, pato, "uniquespatialterms_40674", glossary);
x2e.outputEQs();
if(srcdir.indexOf("/test/")>0){
String resulttable = ApplicationUtilities.getProperty("table.output")+"_40674_debug";
//String goldstandard = "goldstandard";
//String goldstandard = "knowledge_40716";
String goldstandard = "naive_40674";
//long startTime = System.currentTimeMillis();
EQPerformanceEvaluation pe = new EQPerformanceEvaluation(database, resulttable, goldstandard,"evaluationrecords", runsetting);
pe.evaluate();
}
} catch (Exception e) {
LOGGER.error("", e);
}*/
}
public static boolean isRecordperformance() {
return recordperformance;
}
public static void setRecordperformance(boolean recordperformance) {
XML2EQ.recordperformance = recordperformance;
}
}