package plugins.harmonization;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.molgenis.pheno.Measurement;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import plugins.HarmonizationComponent.LevenshteinDistanceModel;
import plugins.HarmonizationComponent.MappingList;
import uk.ac.ebi.ontocat.OntologyService.SearchOptions;
import uk.ac.ebi.ontocat.OntologyServiceException;
import uk.ac.ebi.ontocat.bioportal.BioportalOntologyService;
public class StringMatchingJob implements Job
{
@SuppressWarnings("unchecked")
@Override
public void execute(JobExecutionContext context) throws JobExecutionException
{
try
{
PredictorInfo predictor = (PredictorInfo) context.getJobDetail().getJobDataMap().get("predictor");
BioportalOntologyService os = new BioportalOntologyService();
List<Measurement> measurements = (List<Measurement>) context.getJobDetail().getJobDataMap()
.get("measurements");
HarmonizationModel model = (HarmonizationModel) context.getJobDetail().getJobDataMap().get("model");
LevenshteinDistanceModel matchingModel = (LevenshteinDistanceModel) context.getJobDetail().getJobDataMap()
.get("matchingModel");
MappingList mappings = new MappingList();
for (String eachBlock : predictor.getBuildingBlocks())
{
predictor.getExpandedQuery().addAll(expandQueryByDefinedBlocks(eachBlock.split(","), model, os));
}
if (!predictor.getExpandedQuery().contains(predictor.getLabel()))
{
predictor.getExpandedQuery().add(predictor.getLabel());
}
predictor.getExpandedQuery().addAll(expandByPotentialBuildingBlocks(predictor.getLabel(), model, os));
predictor.setExpandedQuery(uniqueList(predictor.getExpandedQuery()));
model.setTotalNumber(model.getTotalNumber() + predictor.getExpandedQuery().size());
for (String eachQuery : predictor.getExpandedQuery())
{
executeMapping(matchingModel, eachQuery, mappings, measurements);
model.setFinishedNumber(model.getFinishedNumber() + 1);
}
model.setFinishedJobs(model.getFinishedJobs() + 1);
predictor.setMappings(mappings);
}
catch (Exception e)
{
e.printStackTrace();
}
}
private void executeMapping(LevenshteinDistanceModel model, String eachQuery, MappingList mappings,
List<Measurement> measurementsInStudy) throws Exception
{
List<String> tokens = model.createNGrams(eachQuery.toLowerCase().trim(), true);
for (Measurement m : measurementsInStudy)
{
List<String> fields = new ArrayList<String>();
if (m.getDescription() != null && !StringUtils.isEmpty(m.getDescription()))
{
fields.add(m.getDescription());
StringBuilder combinedString = new StringBuilder();
if (m.getCategories_Name().size() > 0)
{
for (String categoryName : m.getCategories_Name())
{
combinedString.delete(0, combinedString.length());
combinedString.append(categoryName.replaceAll(m.getInvestigation_Name(), "")).append(" ")
.append(m.getDescription());
fields.add(combinedString.toString().replaceAll("_", " "));
}
}
}
for (String question : fields)
{
List<String> dataItemTokens = model.createNGrams(question.toLowerCase().trim(), true);
double similarity = model.calculateScore(dataItemTokens, tokens);
mappings.add(eachQuery, (m.getDescription() == null ? m.getName() : m.getDescription()), similarity,
m.getName());
}
}
}
private List<String> expandByPotentialBuildingBlocks(String predictorLabel, HarmonizationModel model,
BioportalOntologyService os) throws OntologyServiceException
{
List<String> expandedQueries = new ArrayList<String>();
ArrayList<List<String>> potentialBlocks = Terms.getTermsLists(Arrays.asList(predictorLabel.split(" ")));
HashMap<String, List<String>> mapForBlocks = new HashMap<String, List<String>>();
boolean possibleBlocks = false;
for (List<String> eachSetOfBlocks : potentialBlocks)
{
for (String eachBlock : eachSetOfBlocks)
{
mapForBlocks.put(eachBlock, collectInfoFromOntology(eachBlock.toLowerCase().trim(), model, os));
if (mapForBlocks.get(eachBlock).size() > 1)
{
possibleBlocks = true;
}
if (!mapForBlocks.get(eachBlock).contains(eachBlock.toLowerCase().trim()))
{
mapForBlocks.get(eachBlock).add(eachBlock.toLowerCase().trim());
}
}
if (possibleBlocks == true)
{
List<String> combinedList = mapForBlocks.get(eachSetOfBlocks.get(0));
if (eachSetOfBlocks.size() > 1)
{
for (int i = 1; i < eachSetOfBlocks.size(); i++)
{
combinedList = combineLists(combinedList, mapForBlocks.get(eachSetOfBlocks.get(i)));
}
}
expandedQueries.addAll(combinedList);
}
mapForBlocks.clear();
possibleBlocks = false;
}
return expandedQueries;
}
private List<String> expandQueryByDefinedBlocks(String[] buildingBlocksArray, HarmonizationModel model,
BioportalOntologyService os) throws OntologyServiceException
{
List<String> expandedQueries = new ArrayList<String>();
HashMap<String, List<String>> mapForBlocks = new HashMap<String, List<String>>();
List<String> buildingBlocks = new ArrayList<String>(Arrays.asList(buildingBlocksArray));
for (String eachBlock : buildingBlocks)
{
mapForBlocks.put(eachBlock, collectInfoFromOntology(eachBlock.toLowerCase().trim(), model, os));
if (!mapForBlocks.get(eachBlock).contains(eachBlock.toLowerCase().trim()))
{
mapForBlocks.get(eachBlock).add(eachBlock.toLowerCase().trim());
}
}
String previousBlock = buildingBlocksArray[0];
List<String> combinedList = mapForBlocks.get(previousBlock);
if (buildingBlocksArray.length > 1)
{
for (int j = 1; j < buildingBlocksArray.length; j++)
{
String nextBlock = buildingBlocksArray[j];
combinedList = combineLists(combinedList, mapForBlocks.get(nextBlock));
}
}
expandedQueries.addAll(combinedList);
return uniqueList(expandedQueries);
}
public List<String> collectInfoFromOntology(String queryToExpand, HarmonizationModel model,
BioportalOntologyService os) throws OntologyServiceException
{
List<String> expandedQueries = new ArrayList<String>();
for (uk.ac.ebi.ontocat.OntologyTerm ot : os.searchAll(queryToExpand, SearchOptions.EXACT))
{
if (model.getOntologyAccessions().contains(ot.getOntologyAccession()))
{
expandedQueries.add(ot.getLabel());
for (String synonym : os.getSynonyms(ot))
{
expandedQueries.add(synonym);
}
try
{
for (uk.ac.ebi.ontocat.OntologyTerm childOt : os.getChildren(ot))
{
expandedQueries.add(childOt.getLabel());
// for (String synonymChild : os.getSynonyms(childOt))
// {
// expandedQueries.add(synonymChild);
// }
}
}
catch (Exception e)
{
System.out.println("The ontology term " + ot.getLabel() + " doesn not have any children!");
}
}
}
return uniqueList(expandedQueries);
}
public List<String> uniqueList(List<String> uncleanedList)
{
List<String> uniqueList = new ArrayList<String>();
for (String eachString : uncleanedList)
{
if (!uniqueList.contains(eachString.toLowerCase().trim()))
{
uniqueList.add(eachString.toLowerCase().trim());
}
}
return uniqueList;
}
public List<String> combineLists(List<String> listOne, List<String> listTwo)
{
List<String> combinedList = new ArrayList<String>();
StringBuilder combinedString = new StringBuilder();
for (String first : listOne)
{
for (String second : listTwo)
{
combinedString.delete(0, combinedString.length());
combinedString.append(first).append(" ").append(second);
if (!combinedList.contains(combinedString.toString()))
{
combinedList.add(combinedString.toString());
}
}
}
return combinedList;
}
}