package uk.ac.ebi.ep.xml.generator;
import java.io.FileNotFoundException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ForkJoinPool;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.bind.JAXBException;
import org.springframework.util.StringUtils;
import uk.ac.ebi.ep.data.domain.IntenzEnzymes;
import uk.ac.ebi.ep.data.domain.UniprotEntry;
import uk.ac.ebi.ep.data.service.EnzymePortalXmlService;
import uk.ac.ebi.ep.xml.config.XmlConfigParams;
import uk.ac.ebi.ep.xml.model.AdditionalFields;
import uk.ac.ebi.ep.xml.model.CrossReferences;
import uk.ac.ebi.ep.xml.model.Database;
import uk.ac.ebi.ep.xml.model.Entries;
import uk.ac.ebi.ep.xml.model.Entry;
import uk.ac.ebi.ep.xml.model.Field;
import uk.ac.ebi.ep.xml.model.Ref;
import uk.ac.ebi.ep.xml.util.DatabaseName;
import uk.ac.ebi.ep.xml.validator.EnzymePortalXmlValidator;
/**
*
* @author Joseph <joseph@ebi.ac.uk>
*/
public class EnzymeCentric extends XmlGenerator {
private final XmlConfigParams xmlConfigParams;
private ForkJoinPool forkJoinPool;
private static final int SEQUENTIAL_THRESHOLD = 10_000;
public EnzymeCentric(EnzymePortalXmlService enzymePortalXmlService, XmlConfigParams xmlConfigParams) {
super(enzymePortalXmlService, xmlConfigParams);
this.xmlConfigParams = xmlConfigParams;
}
@Override
public void validateXML() {
String ebeyeXSDs = xmlConfigParams.getEbeyeXSDs();
String enzymeCentricXmlDir = xmlConfigParams.getEnzymeCentricXmlDir();
if (ebeyeXSDs == null || enzymeCentricXmlDir == null) {
try {
String msg
= "Xsd files or XML directory cannot be Null. Please ensure that ep-xml-config.properties is in"
+ " the classpath.";
throw new FileNotFoundException(msg);
} catch (FileNotFoundException ex) {
logger.error(ex.getMessage(), ex);
}
} else {
String[] xsdFiles = ebeyeXSDs.split(",");
EnzymePortalXmlValidator.validateXml(enzymeCentricXmlDir, xsdFiles);
}
}
@Override
public void generateXmL() throws JAXBException {
generateXmL(xmlConfigParams.getEnzymeCentricXmlDir());
}
@Override
public void generateXmL(String xmlFileLocation) throws JAXBException {
// List<IntenzEnzymes> enzymes
// = enzymePortalXmlService.findAllIntenzEnzymes().stream().sorted().collect(Collectors.toList());
List<IntenzEnzymes> enzymes
= enzymePortalXmlService.findNonTransferredEnzymes().stream().sorted().collect(Collectors.toList());
int entryCount = enzymes.size();
logger.warn("Number of Intenz enzymes ready to be processed : " + entryCount);
Database database = buildDatabaseInfo(entryCount);
List<Entry> entryList = enzymes.stream().map(enzyme -> {
List<UniprotEntry> entries = enzymePortalXmlService.findEnzymesByEcNumberNativeQuery(enzyme.getEcNumber());
Entry processedEntry;
if (entries.size() <= SEQUENTIAL_THRESHOLD) {
Stream<UniprotEntry> sequentialUniProtEntryStream = entries.stream();
Set<Field> seqFields = new HashSet<>();
Set<Ref> seqRefs = new HashSet<>();
processedEntry = processEntries(sequentialUniProtEntryStream, enzyme, seqFields, seqRefs);
} else {
Stream<UniprotEntry> parallelUniProtEntryStream = entries.stream().parallel();
Set<Field> parallelFields = Collections.synchronizedSet(new HashSet<>());
Set<Ref> parallelRefs = Collections.synchronizedSet(new HashSet<>());
processedEntry = getForkJoinPool()
.submit(() -> processEntries(parallelUniProtEntryStream, enzyme, parallelFields, parallelRefs))
.join();
}
return processedEntry;
}).collect(Collectors.toList());
Entries entries = new Entries();
entries.setEntry(entryList);
database.setEntries(entries);
String xmlDirectory = xmlConfigParams.getXmlDir();
writeXml(database, xmlDirectory, xmlFileLocation);
}
private Entry processEntries(Stream<UniprotEntry> entries, IntenzEnzymes enzyme, Set<Field> fields, Set<Ref> refs) {
Entry entry = new Entry();
entry.setId(enzyme.getEcNumber());
entry.setName(enzyme.getEnzymeName());
entry.setDescription(enzyme.getCatalyticActivity());
addEnzymeFamilyField(enzyme.getEcNumber(), fields);
entries.forEach((uniprotEntry) -> {
addUniprotIdFields(uniprotEntry, fields);
addProteinNameFields(uniprotEntry, fields);
addScientificNameFields(uniprotEntry, fields);
addCommonNameFields(uniprotEntry, fields);
addGeneNameFields(uniprotEntry, fields);
addSynonymFields(uniprotEntry, fields);
addSource(enzyme, refs);
addAccessionXrefs(uniprotEntry, refs);
addTaxonomyXrefs(uniprotEntry, refs);
addCompoundFieldsAndXrefs(uniprotEntry, fields, refs);
addDiseaseFieldsAndXrefs(uniprotEntry, fields, refs);
addPathwaysXrefs(uniprotEntry, refs);
});
AdditionalFields additionalFields = new AdditionalFields();
additionalFields.setField(fields);
entry.setAdditionalFields(additionalFields);
CrossReferences cr = new CrossReferences();
cr.setRef(refs);
entry.setCrossReferences(cr);
return entry;
}
private void addSource(IntenzEnzymes enzyme, Set<Ref> refs) {
if (!StringUtils.isEmpty(enzyme.getEcNumber())) {
Ref xref = new Ref(enzyme.getEcNumber(), DatabaseName.INTENZ.getDbName());
refs.add(xref);
}
}
private ForkJoinPool getForkJoinPool() {
if (forkJoinPool == null) {
forkJoinPool = new ForkJoinPool();
}
return forkJoinPool;
}
}