package rainbownlp.machinelearning; import java.io.Serializable; import java.util.Date; import java.util.HashMap; import java.util.List; import javax.persistence.CascadeType; import javax.persistence.Entity; import javax.persistence.FetchType; import javax.persistence.GeneratedValue; import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; import javax.persistence.PrePersist; import javax.persistence.PreUpdate; import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; import org.hibernate.Session; import org.hibernate.annotations.GenericGenerator; import rainbownlp.core.Artifact; import rainbownlp.core.Artifact.Type; import rainbownlp.core.FeatureValuePair; import rainbownlp.core.Phrase; import rainbownlp.core.PhraseLink; import rainbownlp.core.PhraseLink.LinkType; import rainbownlp.util.FileUtil; import rainbownlp.util.HibernateUtil; import rainbownlp.util.ConfigurationUtil; @Entity @Table( name = "MLExample" ) public class MLExample implements Serializable { int exampleId; String predictedClass; String expectedClass; boolean forTrain; String corpusName; String predictionEngine; Artifact relatedArtifact; Phrase relatedPhrase; PhraseLink relatedPhraseLink; private String associatedFilePath; private double predictionWeight; private int expectedReal; private int expectedClosure; private int expectedIntegrated; private String expectedClassOptionalCategory; private String predictedClassOptionalCategory; private String relatedConcept; @Transient List<MLExampleFeature> exampleFeatures; static public Session hibernateSession; @Transient public List<MLExampleFeature> getExampleFeatures() { if(exampleFeatures==null) { if(hibernateSession == null) hibernateSession = HibernateUtil.sessionFactory.openSession(); String hql = "from MLExampleFeature where relatedExample = "+ getExampleId()+ " order by featureValuePair.tempFeatureIndex"; exampleFeatures = (List<MLExampleFeature>) HibernateUtil.executeReader(hql, null,null, hibernateSession); } return exampleFeatures; } @Transient public MLExampleFeature getExampleFeatureById(int featureValuePairId) { if(hibernateSession == null) hibernateSession = HibernateUtil.sessionFactory.openSession(); String hql = "from MLExampleFeature where relatedExample="+ getExampleId()+ " and featureValuePair="+featureValuePairId; List<MLExampleFeature> exampleFeatures = (List<MLExampleFeature>) HibernateUtil.executeReader(hql,null,null,hibernateSession); if(exampleFeatures!=null && exampleFeatures.size()>0) return exampleFeatures.get(0); else return null; } public String getPredictionEngine() { return predictionEngine; } public void setPredictionEngine(String pPredictionEngine) { predictionEngine = pPredictionEngine; } @ManyToOne( cascade = {CascadeType.PERSIST, CascadeType.MERGE }, fetch=FetchType.LAZY ) @JoinColumn(name="relatedPhrase") public Phrase getRelatedPhrase() { return relatedPhrase; } public void setRelatedPhrase(Phrase relatedPhrase) { this.relatedPhrase = relatedPhrase; } public String getCorpusName() { return corpusName; } public void setCorpusName(String pCorpusName) { corpusName = pCorpusName; } public boolean getForTrain() { return forTrain; } @ManyToOne( cascade = {CascadeType.PERSIST, CascadeType.MERGE }, fetch=FetchType.LAZY ) @JoinColumn(name="relatedArtifact") public Artifact getRelatedArtifact() { return relatedArtifact; } public void setRelatedArtifact(Artifact relatedArtifact) { this.relatedArtifact = relatedArtifact; } @ManyToOne( cascade = {CascadeType.PERSIST, CascadeType.MERGE} , fetch=FetchType.LAZY ) @JoinColumn(name="relatedPhraseLink") public PhraseLink getRelatedPhraseLink() { return relatedPhraseLink; } public void setRelatedPhraseLink(PhraseLink relatedPhraseLink) { this.relatedPhraseLink = relatedPhraseLink; } public void setForTrain(boolean isForTrain) { forTrain = isForTrain; } public String getPredictedClass() { return predictedClass; } public void setPredictedClass(String pPredictedClass) { predictedClass = pPredictedClass; } public String getExpectedClass() { return expectedClass; } public void setPredictedClass(Integer pPredictedClass) { setPredictedClass(pPredictedClass.toString()); } public void setExpectedClass(Integer pExpectedClass) { setExpectedClass(pExpectedClass.toString()); } public void setExpectedClass(String pExpectedClass) { expectedClass = pExpectedClass; } @Id @GeneratedValue(generator="increment") @GenericGenerator(name="increment", strategy = "increment") public int getExampleId() { return exampleId; } public void setExampleId(int exampleId) { this.exampleId = exampleId; } @Temporal(TemporalType.TIMESTAMP) Date updateTime; @PrePersist protected void onCreate() { updateTime = new Date(); } @PreUpdate protected void onUpdate() { updateTime = new Date(); } public static MLExample getInstanceForArtifact(Artifact artifact, String experimentgroup) { String hql = "from MLExample where relatedArtifact = "+ artifact.getArtifactId() + " and corpusName = '"+ experimentgroup+"'"; List<MLExample> example_objects = getExamplesList(hql); MLExample example_obj; if(example_objects.size()==0) { example_obj = new MLExample(); example_obj.setCorpusName(experimentgroup); example_obj.setRelatedArtifact(artifact); if(ConfigurationUtil.SaveInGetInstance) saveExample(example_obj); }else { example_obj = example_objects.get(0); } return example_obj; } public void calculateFeatures( List<IFeatureCalculator> featureCalculators) throws Exception { for(IFeatureCalculator feature_calculator : featureCalculators) { Date before = new Date(); feature_calculator.calculateFeatures(this); Date after = new Date(); FileUtil.logLine(null, feature_calculator.getClass().toString()+" "+(after.getTime()-before.getTime())); } } public static MLExample getInstanceForLink(PhraseLink phrase_link, String experimentgroup) { String hql = "from MLExample where relatedPhraseLink = "+ phrase_link.getPhraseLinkId() + " and corpusName = '"+ experimentgroup+"'"; List<MLExample> example_objects = getExamplesList(hql); MLExample example_obj; if(example_objects.size()==0) { example_obj = new MLExample(); example_obj.setCorpusName(experimentgroup); example_obj.setRelatedPhraseLink(phrase_link); if(phrase_link.getFromPhrase().getStartArtifact()!=null) example_obj.setAssociatedFilePath(phrase_link.getFromPhrase().getStartArtifact().getAssociatedFilePath()); if(ConfigurationUtil.SaveInGetInstance) saveExample(example_obj); }else { example_obj = example_objects.get(0); } return example_obj; } public static void saveExample(MLExample example) { if(hibernateSession == null) hibernateSession = HibernateUtil.loaderSession; HibernateUtil.save(example, hibernateSession); } static List<MLExample> getExamplesList(String hql, Integer limit) { List<MLExample> examples; if(hibernateSession == null) hibernateSession = HibernateUtil.loaderSession; if(!hibernateSession.isOpen()) hibernateSession = HibernateUtil.sessionFactory.openSession(); examples = (List<MLExample>) HibernateUtil.executeReader(hql, null, limit, hibernateSession); return examples; } static List<MLExample> getExamplesList(String hql, HashMap<String, Object> params) { List<MLExample> examples; if(hibernateSession == null) hibernateSession = HibernateUtil.loaderSession; examples = (List<MLExample>) HibernateUtil.executeReader(hql, params, null, hibernateSession); return examples; } public static List<MLExample> getAllExamples(String experimentgroup, boolean for_train, Integer limit) { return getAllExamples(experimentgroup, for_train, "exampleId", limit); } public static List<MLExample> getAllExamples(String experimentgroup, boolean for_train) { return getAllExamples(experimentgroup, for_train, "exampleId", null); } public static List<MLExample> getAllExamples(String experimentgroup, boolean for_train, String orderByPhrase, Integer limit) { String hql = "from MLExample where corpusName = '"+ experimentgroup+"' and forTrain="+(for_train?1:0) +" order by "+orderByPhrase; return getExamplesList(hql, limit); } public static List<MLExample> getAllExamples(boolean for_train, Integer limit) { String hql = "from MLExample where forTrain="+(for_train?1:0) +" order by exampleId"; return getExamplesList(hql, limit); } public static List<MLExample> getAllExamples(boolean for_train) { String hql = "from MLExample where forTrain="+(for_train?1:0) +" order by exampleId"; return getExamplesList(hql); } private static List<MLExample> getExamplesList(String hql) { return getExamplesList(hql, Integer.MAX_VALUE); } public static List<MLExample> getExampleById(int example_id, String experimentgroup) { String hql = "from MLExample where corpusName = '"+ experimentgroup+"' and exampleId="+example_id +" order by exampleId"; return getExamplesList(hql); } public static MLExample getExampleById(int example_id) { String hql = "from MLExample where exampleId="+example_id; List<MLExample> example_objects = (List<MLExample>) HibernateUtil.executeReader(hql); MLExample example_obj=null; if(example_objects.size()!=0) { example_obj = example_objects.get(0); } return example_obj; } public static List<MLExample> getAllExamples(String experimentgroup, boolean for_train, int limit) { String hql = "from MLExample where corpusName = '"+ experimentgroup+"' and forTrain="+(for_train?1:0)+" order by exampleId"; return getExamplesList(hql); } public static List<MLExample> getLastExamples(String experimentgroup, boolean for_train, int limit) { String hql = "from MLExample where corpusName = '"+ experimentgroup+"' and forTrain="+(for_train?1:0)+ "order by exampleId desc"; return getExamplesList(hql); } public static List<MLExample> getExampleByExpectedClass(String experimentgroup,boolean for_train, int expectedClass) { String hql = "from MLExample where corpusName = '"+ experimentgroup+"' and expectedClass="+expectedClass +" and forTrain="+(for_train?1:0)+" order by exampleId"; return getExamplesList(hql); } public static List<MLExample> getExamplesInDocument(String experimentgroup, String doc_path) { String hql = "FROM MLExample " + "where corpusName =:corpusName " + " and associatedFilePath = '" + doc_path + "' " + "order by exampleId desc"; HashMap<String, Object> params = new HashMap<String, Object>(); params.put("corpusName", experimentgroup); return getExamplesList(hql, params); } /** * Get examples in document with the given expectedClass * @param experimentgroup * @param doc_path * @return */ public static List<MLExample> getExamplesInDocument(String experimentgroup, String doc_path,Integer expectedClass) { String hql = "FROM MLExample " + "where expectedClass = :expectedClass and corpusName =:corpusName " + " and associatedFilePath = '" + doc_path + "' " + "order by exampleId desc"; HashMap<String, Object> params = new HashMap<String, Object>(); params.put("corpusName", experimentgroup); params.put("expectedClass", expectedClass); return getExamplesList(hql, params); } public static List<MLExample> getExamplesByDocument(String experimentgroup, boolean for_train, int num_of_documents) { List<Artifact> docs = Artifact.listByType(Type.Document, for_train); if(docs.size()<num_of_documents) num_of_documents = docs.size(); String docPaths = ""; for(int i=0;i<num_of_documents;i++) docPaths = docPaths.concat(", '"+docs.get(i).getAssociatedFilePath()+"'"); docPaths = docPaths.replaceFirst(",", ""); String hql = "FROM MLExample " + "where corpusName =:corpusName " + " and forTrain="+(for_train?1:0) +" and associatedFilePath in (" + docPaths + ") " + "order by associatedFilePath desc"; HashMap<String, Object> params = new HashMap<String, Object>(); params.put("corpusName", experimentgroup); return getExamplesList(hql, params); } public static List<MLExample> getExamplesByEventTypeByDocument(String experimentgroup, boolean for_train, int num_of_documents, String type1, String type2,String order) { List<Artifact> docs = Artifact.listByType(Type.Document,for_train); if(docs.size()<num_of_documents) num_of_documents = docs.size(); String docPaths = ""; if (order.equals("top")) { for(int i=0;i<num_of_documents;i++) docPaths = docPaths.concat(", '"+docs.get(i).getAssociatedFilePath()+"'"); docPaths = docPaths.replaceFirst(",", ""); } else if(order.equals("last")) { for(int i=docs.size()-1;i>docs.size()-num_of_documents-1;i--) docPaths = docPaths.concat(", '"+docs.get(i).getAssociatedFilePath()+"'"); docPaths = docPaths.replaceFirst(",", ""); } Integer type1_from_fvpIds = FeatureValuePair.getRelatedFromEventTypeFValuePairIds(type1); Integer type1_to_fvpIds = FeatureValuePair.getRelatedToEventTypeFValuePairIds(type1); Integer type2_from_fvpIds = FeatureValuePair.getRelatedFromEventTypeFValuePairIds(type2); Integer type2_to_fvpIds = FeatureValuePair.getRelatedToEventTypeFValuePairIds(type2); // String from_fvpIds = ""; // for(Integer id: fromFeatureValuePairIds) // { // from_fvpIds = from_fvpIds.concat(", '"+id+"'"); // } // from_fvpIds = from_fvpIds.replaceFirst(",", ""); // // // String to_fvpIds = ""; // for(Integer id: toTeatureValuePairIds) // { // to_fvpIds = to_fvpIds.concat(", '"+id+"'"); // } // to_fvpIds = to_fvpIds.replaceFirst(",", ""); String hql = " FROM MLExample m " + "where (( exists (from MLExampleFeature f where m.exampleId =f.relatedExample and featureValuePair in ("+type1_from_fvpIds+"))" + " and exists (from MLExampleFeature f where m.exampleId =f.relatedExample and featureValuePair in ("+type2_to_fvpIds+"))) or" + " (exists (from MLExampleFeature f where m.exampleId =f.relatedExample and featureValuePair in ("+type2_from_fvpIds+")) and " + "exists (from MLExampleFeature f where m.exampleId =f.relatedExample and featureValuePair in ("+type1_to_fvpIds+")))) " + " and corpusName =:corpusName " + " and forTrain="+(for_train?1:0) +" and " + "associatedFilePath in (" + docPaths + ") " + "order by associatedFilePath desc"; HashMap<String, Object> params = new HashMap<String, Object>(); params.put("corpusName", experimentgroup); return getExamplesList(hql, params); } public static List<MLExample> getLastExamplesByDocument(String experimentgroup, boolean for_train, int num_of_documents) { List<Artifact> docs = Artifact.listByType(Type.Document,for_train); if(docs.size()<num_of_documents) num_of_documents = docs.size(); String docPaths = ""; for(int i=docs.size()-1;i>docs.size()-num_of_documents-1;i--) docPaths = docPaths.concat(", '"+docs.get(i).getAssociatedFilePath()+"'"); docPaths = docPaths.replaceFirst(",", ""); String hql = "FROM MLExample " + "where corpusName =:corpusName " + " and forTrain="+(for_train?1:0) +" and associatedFilePath in (" + docPaths + ") " + "order by associatedFilePath desc"; HashMap<String, Object> params = new HashMap<String, Object>(); params.put("corpusName", experimentgroup); return getExamplesList(hql, params); } @Override public MLExample clone() { if(relatedArtifact!=null) return getInstanceForArtifact(relatedArtifact, corpusName); else return getInstanceForLink(relatedPhraseLink, corpusName); } public static void resetExamplesPredicted(String experimentgroup, boolean for_train) { String hql = "update MLExample set predictedClass = -1 where corpusName = '"+ experimentgroup+"' and forTrain="+(for_train?1:0); HibernateUtil.executeNonReader(hql); } public static void setExamplePredictedClass(int example_id, int predicted) { String hql = "update MLExample set predictedClass = "+predicted+" where exampleId="+example_id; HibernateUtil.executeNonReader(hql); } public static void resetExamplesPredictedToDefault(String experimentgroup, boolean for_train, int default_predicted) { String hql = "update MLExample set predictedClass = "+default_predicted+" where corpusName = '"+ experimentgroup+"' and forTrain="+(for_train?1:0); HibernateUtil.executeNonReader(hql); } public void setAssociatedFilePath(String associatedFilePath) { this.associatedFilePath = associatedFilePath; } public String getAssociatedFilePath() { return associatedFilePath; } public static void updateAssociatedFilePath() { String hql = "from MLExample "; Session tempSession = HibernateUtil.sessionFactory.openSession(); List<MLExample> examples = (List<MLExample>) HibernateUtil.executeReader(hql, null,null, tempSession); for (MLExample example: examples) { PhraseLink related_phrase_link = example.getRelatedPhraseLink(); Phrase from_phrase = related_phrase_link.getFromPhrase(); Artifact start_artifact = from_phrase.getStartArtifact(); String file_path = start_artifact.getAssociatedFilePath(); example.setAssociatedFilePath(file_path); HibernateUtil.save(example, tempSession); } tempSession.clear(); tempSession.close(); } public static MLExample findInstance(PhraseLink phrase_link, String experimentgroup) { String hql = "from MLExample where relatedPhraseLink = "+ phrase_link.getPhraseLinkId() + " and corpusName = '"+ experimentgroup+"'"; List<MLExample> example_objects = getExamplesList(hql); MLExample example_obj=null; if(example_objects.size()!=0) { example_obj = example_objects.get(0); } return example_obj; } public static MLExample findInstance(PhraseLink phrase_link) { String hql = "from MLExample where relatedPhraseLink = "+ phrase_link.getPhraseLinkId(); List<MLExample> example_objects = getExamplesList(hql); MLExample example_obj=null; if(example_objects.size()!=0) { example_obj = example_objects.get(0); } return example_obj; } public static List<MLExample> getDecidedExamplesForGraph(Artifact p_sentence) { String hql = "from MLExample where predictedClass <> -1 " + "and relatedPhraseLink.fromPhrase.startArtifact.parentArtifact = "+ p_sentence.getArtifactId()+" and relatedPhraseLink.toPhrase.startArtifact.parentArtifact = "+ p_sentence.getArtifactId(); List<MLExample> example_objects = getExamplesList(hql); return example_objects; } /** * Get Artifact-RelatedConcept example. * This example is suitable to classify artifacts into unknown set of classes, use linkedConcept to store class * For example finding whether a sentence is evidence for an specific gene, pass the sentence artifact and the gene as linkedConcept * @param artifact * @param linkedConcept * @param experimentGroup * @return */ public static MLExample getInstance(Artifact artifact, String linkedConcept, String experimentGroup) { String hql = "from MLExample where relatedArtifact = "+ artifact.getArtifactId() + " and relatedConcept = :relatedConcept " + " and corpusName = :corpusName"; HashMap<String, Object> params = new HashMap<String, Object>(); params.put("relatedConcept", linkedConcept); params.put("corpusName", experimentGroup); List<MLExample> example_objects = getExamplesList(hql, params); MLExample example_obj; if(example_objects.size()==0) { example_obj = new MLExample(); example_obj.setCorpusName(experimentGroup); example_obj.setRelatedArtifact(artifact); example_obj.setRelatedConcept(linkedConcept); example_obj.setAssociatedFilePath(artifact.getAssociatedFilePath()); if(ConfigurationUtil.SaveInGetInstance) saveExample(example_obj); }else { example_obj = example_objects.get(0); } return example_obj; } public void setPredictionWeight(double predictionWeight) { this.predictionWeight = predictionWeight; } public double getPredictionWeight() { return predictionWeight; } public void setExpectedReal(int expectedReal) { this.expectedReal = expectedReal; } public int getExpectedReal() { return expectedReal; } public void setExpectedClosure(int expectedClosure) { this.expectedClosure = expectedClosure; } public int getExpectedClosure() { return expectedClosure; } public void setExpectedIntegrated(int expectedIntegrated) { this.expectedIntegrated = expectedIntegrated; } public int getExpectedIntegrated() { return expectedIntegrated; } public String getRelatedConcept() { return relatedConcept; } public void setRelatedConcept(String relatedConcept) { this.relatedConcept = relatedConcept; } public static void deleteAllExamples(String experimentgroup) { String hql = "delete from MLExample where corpusName = '"+ experimentgroup+"'"; HibernateUtil.executeNonReader(hql, true); } public static MLExample findInstance(Artifact artifact, String linkedConcept) { String hql = "from MLExample where relatedArtifact = "+ artifact.getArtifactId() + " and relatedConcept = :relatedConcept "; HashMap<String, Object> params = new HashMap<String, Object>(); params.put("relatedConcept", linkedConcept); List<MLExample> example_objects = getExamplesList(hql, params); MLExample example_obj = null; if(example_objects.size()!=0) { example_obj = example_objects.get(0); } return example_obj; } public void setExpectedClassOptionalCategory( String expectedClassOptionalCategory) { this.expectedClassOptionalCategory = expectedClassOptionalCategory; } public String getExpectedClassOptionalCategory() { return expectedClassOptionalCategory; } public void setPredictedClassOptionalCategory( String predictedClassOptionalCategory) { this.predictedClassOptionalCategory = predictedClassOptionalCategory; } public String getPredictedClassOptionalCategory() { return predictedClassOptionalCategory; } public static List<MLExample> getAllExamples(String pathLike) { String hql = "from MLExample where associatedFilePath like '%"+ pathLike+"%' order by exampleId"; return getExamplesList(hql); } @Transient public Double getNumericExpectedClass() { return Double.parseDouble(expectedClass); } @Transient public Double getNumericPredictedClass() { return Double.parseDouble(predictedClass); } }