/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package uk.ac.ebi.ep.parser.parsers;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import uk.ac.ebi.chebi.webapps.chebiWS.client.ChebiWebServiceClient;
import uk.ac.ebi.ep.centralservice.helper.Relationship;
import uk.ac.ebi.ep.data.entry.Summary;
import uk.ac.ebi.ep.data.repositories.EnzymePortalCompoundRepository;
import uk.ac.ebi.ep.data.repositories.EnzymePortalSummaryRepository;
import uk.ac.ebi.ep.data.search.model.Compound;
import uk.ac.ebi.ep.data.service.EnzymePortalParserService;
/**
*
* @author joseph <joseph@ebi.ac.uk>
*/
public class Cofactors extends CompoundParser {
private List<LiteCompound> compounds = null;
private static final String COMMENT_TYPE = "COFACTORS";
private static final String NAME = "Name=([^\\s]+)";
private static final String XREF = "Xref=ChEBI:([^\\s]+)";
private static final String NOTE = "Note=([^\\*]+)";
public Cofactors(ChebiWebServiceClient chebiWsClient, EnzymePortalCompoundRepository compoundRepository, EnzymePortalSummaryRepository enzymeSummaryRepository, EnzymePortalParserService parserService) {
super(chebiWsClient, compoundRepository, enzymeSummaryRepository, parserService);
compounds = new ArrayList<>();
}
/**
* parse cofactor comments from uniprot and validates compound names in
* chebi before storing them to enzyme portal database
*/
@Override
public void loadCofactors() {
List<Summary> enzymeSummary = enzymeSummaryRepository.findSummariesByCommentType(COMMENT_TYPE);
LOGGER.info("Number of Regulation Text from EnzymeSummary Table to parse for cofactors " + enzymeSummary.size());
parseCofactorText(enzymeSummary);
}
private void computeSpecialCases(String text, Summary summary, String note) {
final Pattern xrefPattern = Pattern.compile(XREF);
final Matcher xrefMatcher = xrefPattern.matcher(text);
while (xrefMatcher.find()) {
String xref = xrefMatcher.group(1).replaceAll(";", "");
if (xref != null) {
Optional<LiteCompound> liteCompound = Optional.empty();
try {
LOGGER.info("Special case : xref search in CHEBI " + xref);
liteCompound = Optional.ofNullable(findByChEBIiD(xref));
} catch (Exception e) {
LOGGER.error("Chebi webservice error while searching " + xref, e);
}
if (liteCompound.isPresent()) {
LiteCompound compound = liteCompound.get();
String compoundId = compound.getCompoundId();
String compoundName = compound.getCompoundName();
String compoundSource = compound.getCompoundSource();
String relationship = Relationship.is_cofactor_of.name();
String compoundRole = Compound.Role.COFACTOR.name();
String url = compound.getUrl();
compound.setCompoundId(compoundId);
compound.setCompoundName(compoundName);
compound.setCompoundSource(compoundSource);
compound.setRelationship(relationship);
compound.setUniprotAccession(summary.getAccession());
compound.setUrl(url);
compound.setCompoundRole(compoundRole);
compound.setNote(note);
compounds.add(compound);
LOGGER.info("added compound for special case " + compound.getCompoundId() + " <> " + compound.getCompoundName());
}
}
}
}
private void parseCofactorText(List<Summary> enzymeSummary) {
//parallel processing
// Stream<Summary> existingStream = enzymeSummary.stream();
// Stream<List<Summary>> partitioned = partition(existingStream, 100, 1);
// //AtomicInteger count = new AtomicInteger(1);
// partitioned.parallel().forEach((chunk) -> {
// chunk.stream().forEach((summary) -> {
// processCofactors(summary);
// });
// });
enzymeSummary.forEach(summary -> {
processCofactors(summary);
});
//save compounds
LOGGER.warn("Writing to Enzyme Portal database... Number of cofactors to write : " + compounds.size());
compounds.stream().filter((compound) -> (compound != null)).forEach((compound) -> {
parserService.createCompound(compound.getCompoundId(), compound.getCompoundName(), compound.getCompoundSource(), compound.getRelationship(), compound.getUniprotAccession(), compound.getUrl(), compound.getCompoundRole(), compound.getNote());
});
LOGGER.warn("-------- Done populating the database with cofactors ---------------");
compounds.clear();
}
private void processCofactors(Summary summary) {
String cofactorText = summary.getCommentText();
String note = "";
final Pattern notePattern = Pattern.compile(NOTE);
final Matcher noteMatcher = notePattern.matcher(cofactorText);
while (noteMatcher.find()) {
note = noteMatcher.group(1);
}
final Pattern namePattern = Pattern.compile(NAME);
final Matcher nameMatcher = namePattern.matcher(cofactorText);
while (nameMatcher.find()) {
String cofactorName = nameMatcher.group(1).replaceAll(";", "");
if (cofactorName != null) {
LOGGER.info("cofactor name search in CHEBI " + cofactorName);
Optional<LiteCompound> liteCompound = Optional.ofNullable(findByCompoundName(cofactorName));
if (liteCompound.isPresent()) {
LiteCompound compound = liteCompound.get();
String compoundId = compound.getCompoundId();
String compoundName = compound.getCompoundName();
String compoundSource = compound.getCompoundSource();
String relationship = Relationship.is_cofactor_of.name();
String compoundRole = Compound.Role.COFACTOR.name();
String url = compound.getUrl();
compound.setCompoundId(compoundId);
compound.setCompoundName(compoundName);
compound.setCompoundSource(compoundSource);
compound.setRelationship(relationship);
compound.setUniprotAccession(summary.getAccession());
compound.setUrl(url);
compound.setCompoundRole(compoundRole);
compound.setNote(note);
compounds.add(compound);
}
if (!liteCompound.isPresent()) {
computeSpecialCases(cofactorText, summary, note);
}
}
}
}
}