/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package uk.ac.ebi.ep.parser.parsers;
//import java.util.ArrayList;
//import java.util.LinkedHashMap;
//import java.util.LinkedList;
//import java.util.List;
//import java.util.Map;
//import java.util.Set;
//import java.util.concurrent.atomic.AtomicInteger;
//import java.util.regex.Matcher;
//import java.util.regex.Pattern;
//import java.util.stream.Stream;
//import org.apache.log4j.Logger;
//import org.springframework.util.StringUtils;
//import uk.ac.ebi.chebi.webapps.chebiWS.client.ChebiWebServiceClient;
//import uk.ac.ebi.chebi.webapps.chebiWS.model.ChebiWebServiceFault_Exception;
//import uk.ac.ebi.chebi.webapps.chebiWS.model.DataItem;
//import uk.ac.ebi.chebi.webapps.chebiWS.model.Entity;
//import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntity;
//import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntityList;
//import uk.ac.ebi.chebi.webapps.chebiWS.model.SearchCategory;
//import uk.ac.ebi.chebi.webapps.chebiWS.model.StarsCategory;
//import uk.ac.ebi.ep.centralservice.helper.CompoundUtil;
//import uk.ac.ebi.ep.centralservice.helper.EPUtil;
//import uk.ac.ebi.ep.centralservice.helper.MmDatabase;
//import uk.ac.ebi.ep.centralservice.helper.Relationship;
//import uk.ac.ebi.ep.data.domain.EnzymePortalCompound;
//import uk.ac.ebi.ep.data.domain.EnzymePortalSummary;
//import uk.ac.ebi.ep.data.domain.UniprotEntry;
//import uk.ac.ebi.ep.data.entry.Summary;
//import uk.ac.ebi.ep.data.repositories.EnzymePortalCompoundRepository;
//import uk.ac.ebi.ep.data.repositories.EnzymePortalSummaryRepository;
//import static uk.ac.ebi.ep.parser.inbatch.PartitioningSpliterator.partition;
/**
*
* @author joseph
*/
@Deprecated// might be used after the fixes by curators
public class ChEBICompounds {
// // use this when parsing the molecule name - we don't want a molecule with parentesis eg sucrose (DTTI)
// private static final Pattern COMPOUND_NAME_PATTERN
// = Pattern.compile("(.*?)(?: \\((.*?)\\))?");
//
// private final Logger LOGGER = Logger.getLogger(ChEBICompounds.class);
// private final ChebiWebServiceClient chebiWsClient;
//
// Map<UniprotEntry, Set<String>> inhibitors = new LinkedHashMap<>();
// Map<UniprotEntry, Set<String>> activators = new LinkedHashMap<>();
//
// List<LiteCompound> compounds = new LinkedList<>();
//
// //private static final String COMMENT_TYPE = "ENZYME_REGULATION";
// private static final String COMMENT_TYPE = "REGULATION";
//
// private final EnzymePortalCompoundRepository compoundRepository;
//
// private final EnzymePortalSummaryRepository enzymeSummaryRepository;
//
// public static final String[] BLACKLISTED_COMPOUNDS = {"ACID", "acid", "H(2)O", "H(+)", "ACID", "WATER", "water", "ion", "ION", "", " "};
// List<String> blackList = Arrays.asList(BLACKLISTED_COMPOUNDS);
//
// public ChEBICompounds(EnzymePortalSummaryRepository enzymeSummaryRepository, EnzymePortalCompoundRepository repository) {
// this.compoundRepository = repository;
// this.enzymeSummaryRepository = enzymeSummaryRepository;
// chebiWsClient = new ChebiWebServiceClient();
//
// }
//
// public void computeAndLoadChEBICompounds() {
//
// List<Summary> enzymeSummary = enzymeSummaryRepository.findSummariesByCommentType(COMMENT_TYPE);
// //LOGGER.warn("Number of Regulation Text from EnzymeSummary Table " + enzymeSummary.size());
//
// //String text = "Activated by cell stresses such as DNA damage, heat shock, osmotic shock, anisomycin and sodium arsenite, as well as pro-inflammatory stimuli such as bacterial lipopolysaccharide (LPS) and interleukin-1. Activation occurs through dual phosphorylation of Thr-180 and Tyr-182 by either of two dual specificity kinases, MAP2K3/MKK3 or MAP2K6/MKK6, and potentially also MAP2K4/MKK4, as well as by TAB1-mediated autophosphorylation. MAPK14 phosphorylated on both Thr-180 and Tyr-182 is 10-20-fold more active than MAPK14 phosphorylated only on Thr-180, whereas MAPK14 phosphorylated on Tyr-182 alone is inactive. whereas Thr-180 is necessary for catalysis, Tyr-182 may be required for auto-activation and substrate recognition. Phosphorylated at Tyr-323 by ZAP70 in an alternative activation pathway in response to TCR signaling in T-cells. This alternative pathway is inhibited by GADD45A. Inhibited by dual specificity phosphatases, such as DUSP1, DUSP10, and DUSP16. Specifically inhibited by the binding of pyridinyl-imidazole compounds, which are cytokine-suppressive anti-inflammatory drugs (CSAID). Isoform Mxi2 is 100-fold less sensitive to these agents than the other isoforms and is not inhibited by DUSP1. Isoform Exip is not activated by MAP2K6. SB203580 is an inhibitor of MAPK14.";
// //Java 7 and before only. uncomment if Java 8 is not available in your env
//// for (EnzymePortalSummary summary : enzymeSummary) {
//// String enzyme_regulation_text = summary.getCommentText();
////
////
//// inhibitors.put(summary.getUniprotAccession(), EPUtil.parseTextForInhibitors(enzyme_regulation_text));
//// activators.put(summary.getUniprotAccession(), EPUtil.parseTextForActivators(enzyme_regulation_text));
//// }
////
////
//// for (Map.Entry<UniprotEntry, Set<String>> map : inhibitors.entrySet()) {
//// UniprotEntry key = map.getKey();
//// for (String inhibitor : map.getValue()) {
//// EnzymePortalCompound inhibitor_from_chebi = searchMoleculeInChEBI(inhibitor);
////
//// if (inhibitor_from_chebi != null) {
////
//// inhibitor_from_chebi.setRelationship(Relationship.is_inhibitor_of.name());
//// inhibitor_from_chebi.setUniprotAccession(key);
//// compounds.add(inhibitor_from_chebi);
//// }
//// }
////
//// }
////
//// for (Map.Entry<UniprotEntry, Set<String>> map : activators.entrySet()) {
//// UniprotEntry key = map.getKey();
//// for (String activator : map.getValue()) {
//// EnzymePortalCompound activator_from_chebi = searchMoleculeInChEBI(activator);
//// if (activator_from_chebi != null) {
////
//// activator_from_chebi.setRelationship(Relationship.is_activator_of.name());
//// activator_from_chebi.setUniprotAccession(key);
//// compounds.add(activator_from_chebi);
//// }
//// }
////
//// }
// //Java 8 specifics - comment out and uncomment above if java 8 is not found in env
//// enzymeSummary.stream().forEach((summary) -> {
//// String enzyme_regulation_text = summary.getCommentText();
//// inhibitors.put(summary.getUniprotAccession(), EPUtil.parseTextForInhibitors(enzyme_regulation_text));
//// activators.put(summary.getUniprotAccession(), EPUtil.parseTextForActivators(enzyme_regulation_text));
//// });
// Stream<Summary> existingStream = enzymeSummary.stream();
// Stream<List<Summary>> partitioned = partition(existingStream, 500, 1);
// AtomicInteger count = new AtomicInteger(1);
// partitioned.parallel().forEach((chunk) -> {
// //System.out.println(count.getAndIncrement() + " BATCH SIZE" + chunk.size());
// chunk.stream().forEach((summary) -> {
// String enzyme_regulation_text = summary.getCommentText();
//
// inhibitors.put(summary.getAccession(), EPUtil.parseTextForInhibitors(enzyme_regulation_text));
// activators.put(summary.getUniprotAccession(), EPUtil.parseTextForActivators(enzyme_regulation_text));
//
// });
// });
//
// LOGGER.debug("number of inhibitors and activators to process are : " + inhibitors.size() + ": " + activators.size());
// inhibitors.entrySet().stream().forEach((map) -> {
// map.getValue().stream().map((inhibitor) -> searchMoleculeInChEBI(inhibitor)).filter((inhibitor_from_chebi) -> (inhibitor_from_chebi != null)).map((inhibitor_from_chebi) -> {
// inhibitor_from_chebi.setRelationship(Relationship.is_inhibitor_of.name());
// inhibitor_from_chebi = CompoundUtil.computeRole(inhibitor_from_chebi, inhibitor_from_chebi.getRelationship());
// return inhibitor_from_chebi;
// }).map((inhibitor_from_chebi) -> {
// inhibitor_from_chebi.setUniprotAccession(map.getKey());
// return inhibitor_from_chebi;
// }).forEach((inhibitor_from_chebi) -> {
// compounds.add(inhibitor_from_chebi);
// });
// });
//
// activators.entrySet().stream().forEach((map) -> {
// map.getValue().stream().map((activator) -> searchMoleculeInChEBI(activator)).filter((activator_from_chebi) -> (activator_from_chebi != null)).map((activator_from_chebi) -> {
// activator_from_chebi.setRelationship(Relationship.is_activator_of.name());
// activator_from_chebi = CompoundUtil.computeRole(activator_from_chebi, activator_from_chebi.getRelationship());
// return activator_from_chebi;
// }).map((activator_from_chebi) -> {
// activator_from_chebi.setUniprotAccession(map.getKey());
// return activator_from_chebi;
// }).forEach((activator_from_chebi) -> {
// compounds.add(activator_from_chebi);
// });
// });
//
// LOGGER.warn("Number of compounds before first filtering : " + compounds.size());
//
// compounds.removeIf(c -> c.getCompoundId().equalsIgnoreCase("CHEBI:338412") && c.getUniprotAccession().getAccession().equalsIgnoreCase("Q16539"));
// compounds.removeIf(c -> c.getCompoundId().equalsIgnoreCase("CHEBI:16412") && c.getUniprotAccession().getAccession().equalsIgnoreCase("Q16539"));
// compounds.removeIf(c -> c.getCompoundId().equalsIgnoreCase("CHEBI:29678") && c.getUniprotAccession().getAccession().equalsIgnoreCase("Q16539"));
// LOGGER.warn("Number of compounds before second filtering : " + compounds.size());
// compounds.removeIf(c
// -> (c.getCompoundId().equalsIgnoreCase("CHEBI:338412")
// || c.getCompoundId().equalsIgnoreCase("CHEBI:16412")
// || c.getCompoundId().equalsIgnoreCase("CHEBI:29678"))
// && c.getUniprotAccession().getAccession().equalsIgnoreCase("Q16539"));
//
// LOGGER.warn("Writing to Enzyme Portal database... Number of compounds to write : " + compounds.size());
//
// compoundRepository.save(compounds);
//
// inhibitors.clear();
// activators.clear();
// compounds.clear();
// }
//
// /**
// * Searches a compound name in ChEBI. Please note that if the name does not
// * match <i>exactly</i> any names/synonyms returned by ChEBI, the result
// * will be <code>null</code>.
// *
// * @param moleculeName the compound name.
// * @return an entry with a ChEBI ID, or <code>null</code> if not found.
// */
// protected EnzymePortalCompound searchMoleculeInChEBI(String moleculeName) {
//
// EnzymePortalCompound entry = null;
// // Sometimes moleculeName comes as "moleculeName (ACRONYM)"
// // sometimes as "moleculeName (concentration)":
// Matcher m = COMPOUND_NAME_PATTERN.matcher(moleculeName);
// m.matches(); // always
// String[] nameAcronym = {m.group(1), m.group(2)};
// // first name, then acronym (if any):
// nameLoop:
// for (String name : nameAcronym) {
// if (name == null) {
// continue; // acronym, usually
// }
// try {
// LiteEntityList lites = chebiWsClient.getLiteEntity(
// name, SearchCategory.ALL_NAMES, 25, StarsCategory.ALL);
// String chebiId = null;
//
// if (lites != null) {
// liteLoop:
// for (LiteEntity lite : lites.getListElement()) {
// Entity completeEntity = chebiWsClient
// .getCompleteEntity(lite.getChebiId());
// List<String> synonyms = new ArrayList<>();
// for (DataItem dataItem : completeEntity.getSynonyms()) {
// synonyms.add(dataItem.getData().toLowerCase());
// }
// List<String> formulae = new ArrayList<>();
// for (DataItem formula : completeEntity.getFormulae()) {
// formulae.add(formula.getData());
// }
// if (completeEntity.getChebiAsciiName()
// .equalsIgnoreCase(name)
// || synonyms.contains(name.toLowerCase())
// || formulae.contains(name)) {
// chebiId = completeEntity.getChebiId();
// }
// if (chebiId != null) {
// break liteLoop;
// }
// }
// }
//
// if ((chebiId == null || blackList.contains(name)) || StringUtils.isEmpty(name)) {
// LOGGER.warn("Not found in ChEBI: " + name);
// } else {
// entry = new EnzymePortalCompound();
// entry.setCompoundSource(MmDatabase.ChEBI.name());
// entry.setCompoundId(chebiId);
// entry.setCompoundName(name);
// break;
// }
// } catch (ChebiWebServiceFault_Exception e) {
// LOGGER.error("Searching for " + name, e);
// }
// }
// return entry;
// }
}