/* * Copyright (c) 2012. Humboldt-Universität zu Berlin, Dept. of Computer Science and Dept. * of Wissensmanagement in der Bioinformatik * ------------------------------- * * THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC * LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM * CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. * * http://www.opensource.org/licenses/cpl1.0 */ package de.berlin.hu.chemspot; import de.berlin.hu.util.Constants; import de.berlin.hu.util.Constants.ChemicalID; import de.berlin.hu.util.Constants.ChemicalType; import org.apache.uima.jcas.tcas.Annotation; import org.u_compare.shared.semantic.NamedEntity; public class Mention implements Comparable<Object> { private int start; private int end; private String text; private String[] ids; private String source; private String documentText; private ChemicalType type; /** * Represents a chemical entity found in a text. * @param start position of the start character of an annotation * @param end position of the end character of an annotation (exclusive) * @param text covered text * @param ids a string representation of an array of identifiers of the form: [0] CHID, [1] CHEB, [2] CAS, [3] PUBC, [4] PUBS, [5] INCH, [6] DRUG, [7] HMBD, [8] KEGG, [9] KEGD, [10] MESH * @param source indicates whether found by the CRF, the dictionary or taken from goldstandard */ public Mention(int start, int end, String text, String ids, String source, String documentText, ChemicalType type) { this.start = start; this.end = end; this.text = text; this.ids = new String[ChemicalID.values().length]; if (ids != null) { String tempIds = ids; if (tempIds.startsWith("[")) tempIds = tempIds.substring(1); if (tempIds.endsWith("]")) tempIds = tempIds.substring(0, tempIds.length() - 1); int i = 0; for (String id : tempIds.split(", |,$|^,")) { if (i >= this.ids.length) break; setId(ChemicalID.values()[i++], id.trim()); } } this.documentText = documentText; setSource(source); setType(type); } public Mention(int start, int end, String text, String ids, String source, String documentText) { this(start, end, text, ids, source, documentText, null); } public Mention(int start, int end, String text) { this(start, end, text, null, null, null); } public Mention(int start, int end) { this(start, end, null); } public Mention(NamedEntity entity) { this(entity.getBegin(), entity.getEnd(), entity.getCoveredText(), entity.getId(), entity.getSource(), entity.getCAS().getDocumentText(), ChemicalType.fromString(entity.getEntityType())); } public int getStart() { return start; } public void setStart(int start) { this.start = start; } public int getEnd() { return end; } public void setEnd(int end) { this.end = end; } public String getText() { return text; } public String[] getIds() { return ids; } public String getSource() { return source; } public void setSource(String source) { this.source = source == null ? Constants.UNKNOWN : source; if (getType() == null || ChemicalType.UNKNOWN.equals(getType())) { setType(ChemicalType.fromString(source)); } } public String getCHID() { return getId(ChemicalID.CHID); } public String getCHEB() { return getId(ChemicalID.CHEB); } public String getCAS() { return getId(ChemicalID.CAS); } public String getPUBC() { return getId(ChemicalID.PUBC); } public String getPUBS() { return getId(ChemicalID.PUBS); } public String getINCH() { return getId(ChemicalID.INCH); } public String getDRUG() { return getId(ChemicalID.DRUG); } public String getHMBD() { return getId(ChemicalID.HMBD); } public String getKEGG() { return getId(ChemicalID.KEGG); } public String getKEGD() { return getId(ChemicalID.KEGD); } public String getMESH() { return getId(ChemicalID.MESH); } public String getFDA() { return getId(ChemicalID.FDA); } public String getFDADate() { return getId(ChemicalID.FDA_DATE); } public String getId(ChemicalID type) { String id = null; try { id = ids[type.ordinal()]; } catch (ArrayIndexOutOfBoundsException e) { //ignore } return id; } public void setCHID(String id) { setId(ChemicalID.CHID, id); } public void setCHEB(String id) { setId(ChemicalID.CHEB, id); } public void setCAS(String id) { setId(ChemicalID.CAS, id); } public void setPUBC(String id) { setId(ChemicalID.PUBC, id); } public void setPUBS(String id) { setId(ChemicalID.PUBS, id); } public void setINCH(String id) { setId(ChemicalID.INCH, id); } public void setDRUG(String id) { setId(ChemicalID.DRUG, id); } public void setHMBD(String id) { setId(ChemicalID.HMBD, id); } public void setKEGG(String id) { setId(ChemicalID.KEGG, id); } public void setKEGD(String id) { setId(ChemicalID.KEGD, id); } public void setMESH(String id) { setId(ChemicalID.MESH, id); } public void setFDA(String id) { setId(ChemicalID.FDA, id); } public void setFDADate(String id) { setId(ChemicalID.FDA_DATE, id); } public void setId(ChemicalID type, String id) { if (id != null && (id.isEmpty() || "null".equals(id))) { id = null; } if (ChemicalID.CHEB.equals(type) && id != null && !id.startsWith("CHEBI:")) { id = "CHEBI:" + id; } try { ids[type.ordinal()] = id != null ? id.trim() : null; } catch (ArrayIndexOutOfBoundsException e) { //ignore } } public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null || !(obj instanceof Mention)) { return false; } Mention other = (Mention) obj; if (getStart() != other.getStart() || getEnd() != other.getEnd() || (getText() == null && other.getText() != null) || (getText() != null && !getText().equals(other.getText()))) { return false; } return true; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + getStart(); result = prime * result + getEnd(); result = prime * result + ((text == null) ? 0 : text.hashCode()); return result; } @Override public String toString() { return start + " " + end + " " + text + " " + getCHID(); } public int compareTo(Object o) { if (this.equals(o)) { return 0; } int otherBegin = 0; int otherEnd = 0; if (o instanceof Mention) { Mention other = (Mention) o; otherBegin = other.getStart(); otherEnd = other.getEnd(); } else if (o instanceof Annotation) { Annotation other = (Annotation) o; otherBegin = other.getBegin(); otherEnd = other.getEnd(); } else { return 0; } if (getStart() != otherBegin) { return getStart() - otherBegin; } else { return getEnd() - otherEnd; } } public boolean overlaps(Mention mention) { return mention != null && ((getStart() >= mention.getStart() && getStart() < mention.getEnd()) || (getEnd() >= mention.getStart() && getEnd() < mention.getEnd()) || (mention.getStart() >= getStart() && mention.getStart() < getEnd()) || (mention.getEnd() >= getStart() && mention.getEnd() < getEnd())); } public String getDocumentText() { return documentText; } public void setDocumentText(String documentText) { this.documentText = documentText; } public ChemicalType getType() { return type; } public void setType(ChemicalType type) { this.type = type == null ? ChemicalType.UNKNOWN : type; } }