package uk.ac.ebi.ep.analysis.service;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import static java.nio.file.StandardOpenOption.CREATE;
import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.PosixFilePermission;
import java.nio.file.attribute.PosixFilePermissions;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import uk.ac.ebi.biobabel.util.StringUtil;
import uk.ac.ebi.ep.analysis.config.ServiceUrl;
import static uk.ac.ebi.ep.data.batch.PartitioningSpliterator.partition;
import uk.ac.ebi.ep.data.domain.SpEnzymeEvidence;
import uk.ac.ebi.ep.data.service.AnalysisService;
import uk.ac.ebi.ep.data.service.EnzymePortalService;
/**
*
* @author Joseph <joseph@ebi.ac.uk>
*/
@Service
public class DataAnalyzer {
private final Logger logger = Logger.getLogger(DataAnalyzer.class);
@Autowired
private ServiceUrl serviceUrl;
@Autowired
private AnalysisService analysisService;
@Autowired
private EnzymePortalService enzymePortalService;
/**
*
* @param file resource location (http://www.uniprot.org/uniprot/)
* @return evidences
*/
private List<String> downloadAccessionList(String file) {
List<String> accessionList = new CopyOnWriteArrayList<>();
try (InputStream is = file.startsWith("http://")
? new URL(file).openStream()
: new FileInputStream(file)) {
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
logger.info("Parsing start");
String line;
while ((line = br.readLine()) != null) {
accessionList.add(line);
}
} catch (IOException e) {
logger.error("Error During parsing", e);
}
return accessionList;
}
/**
* write enzymes (with experimental evidence) to enzyme portal database
*/
public void populateEnzymesWithEvidences() {
try {
List<SpEnzymeEvidence> enzymeEvidences = computeAccessionsWithEvidences();
logger.warn("num evidences written to enzyme portal database " + enzymeEvidences.size());
analysisService.populateEvidences(enzymeEvidences);
logger.warn("finished updating the evidence table. Starting to update UniprotEntry table with evidence Flag");
enzymePortalService.updateExpEvidenceFlag();
} catch (InterruptedException | ExecutionException ex) {
logger.error("InterruptedException | ExecutionException ", ex);
}
}
/**
* This writes the evidence analysis to a file. if no directory is
* specified, file will be generated at the user home. if no filename is
* specified, evidence.tsv will be used as filename
*
* @param fileDir the directory for the file to be written
* @param filename filename
* @param deleteFile true if file clean up is required
*/
public void writeToFile(String fileDir, String filename, Boolean deleteFile) {
try {
List<SpEnzymeEvidence> enzymeEvidences = computeAccessionsWithEvidences();
String fileName = filename;
if (StringUtil.isNullOrEmpty(filename)) {
fileName = "evidence.tsv";
}
logger.warn("num evidences written to file [" + fileName + "] " + enzymeEvidences.size());
writeToFile(enzymeEvidences, fileDir, fileName, deleteFile);
} catch (InterruptedException | ExecutionException ex) {
logger.error("InterruptedException | ExecutionException ", ex);
}
}
/**
* This writes the evidence analysis to a file. if no directory is
* specified, file will be generated at the user home. if no filename is
* specified, evidence.tsv will be used as filename
*
* @param enzymeEvidences SwissProt enzymes with evidences code 269
* @param fileDir the directory for the file to be written
* @param filename filename
* @param deleteFile true if file clean up is required
*/
public void writeToFile(List<SpEnzymeEvidence> enzymeEvidences, String fileDir, String filename, Boolean deleteFile) {
List<String> dataList = new CopyOnWriteArrayList<>();
enzymeEvidences.stream().map(ev -> {
String acc = ev.getAccession();
String evidence = ev.getEvidenceLine();
return "Accession : " + acc + " : EvidenType : " + evidence;
}).forEach(data -> dataList.add(data));
if (!StringUtil.isNullOrEmpty(fileDir)) {
createDirAndFile(dataList, fileDir, filename, deleteFile);
} else {
createFile(dataList, filename, deleteFile);
}
}
/**
* This writes file to ${user.home}
*
* @param dataList data to write
* @param filename filename
* @param deleteFile set to true if needs to delete file after creation
*/
private void createFile(List<String> dataList, String filename, Boolean deleteFile) {
try {
String userHome = System.getProperty("user.home");
String filePath = String.format("%s/%s", userHome, filename);
bufferedWrite(dataList, filePath);
if (deleteFile) {
Path path = Paths.get(filePath);
Files.deleteIfExists(path);
}
} catch (IOException ex) {
logger.error(ex);
}
}
/**
*
* @param dataList data
* @param fileLocation where file will be writen
* @param filename filename
* @param deleteFile true if file is to be deleted (use afterwards)
*/
private void createDirAndFile(List<String> dataList, String fileLocation, String filename, Boolean deleteFile) {
try {
String fileDir = fileLocation;
Set<PosixFilePermission> perms
= PosixFilePermissions.fromString("rwxr-x---");
FileAttribute<Set<PosixFilePermission>> attr
= PosixFilePermissions.asFileAttribute(perms);
Files.createDirectories(Paths.get(fileDir), attr);
String filePath = String.format("%s/%s", fileLocation, filename);
bufferedWrite(dataList, filePath);
//bufferedStream(dataList, filePath);
if (deleteFile) {
Path path = Paths.get(filePath);
Files.deleteIfExists(path);
}
} catch (IOException ex) {
logger.error(ex);
}
}
void bufferedStream(List<String> content, String filePath) {
Path p = Paths.get(filePath);
try (OutputStream out = new BufferedOutputStream(
Files.newOutputStream(p, CREATE, TRUNCATE_EXISTING))) {
for (String element : content) {
out.write(element.getBytes(), 0, element.length());
}
} catch (IOException ex) {
logger.error(ex);
}
}
/**
* <note>adaptation from Diego code</note>
* 048 Write a big list of Strings to a file - Use a BufferedWriter 049
*
* @param content
* @param filePath
*/
private void bufferedWrite(List<String> content, String filePath) throws IOException {
Path fileP = Paths.get(filePath);
Charset charset = Charset.forName("utf-8");
try (BufferedWriter writer = Files.newBufferedWriter(fileP, charset)) {
for (String line : content) {
writer.write(line, 0, line.length());
writer.newLine();
}
} catch (IOException ex) {
logger.error(ex.getMessage(), ex);
}
}
/**
*
* @param url uniprot website
* @param evidenceType type of evidence (see EvidenceType Class)
* @param enzymes enzymes from enzyme portal database
* @return enzymes with evidence tags
*/
private List<SpEnzymeEvidence> tagEvidences(String url, String evidenceType, List<String> enzymes) {
List<String> accessions = downloadAccessionList(url);
return splitOperation(accessions, evidenceType, enzymes);
}
private SpEnzymeEvidence createSpEnzymeEvidence(String accession, String evidenceType) {
SpEnzymeEvidence evidence = new SpEnzymeEvidence();
evidence.setAccession(accession);
evidence.setEvidenceLine(evidenceType);
return evidence;
}
private List<SpEnzymeEvidence> splitOperation(List<String> accessions, String evidenceType, List<String> enzymes) {
List<SpEnzymeEvidence> evidences = new CopyOnWriteArrayList<>();
Stream<String> existingStream = accessions.stream();
Stream<List<String>> partitioned = partition(existingStream, 100, 1);
partitioned.parallel().forEach(chunk -> {
chunk.stream()
.filter(accession -> enzymes.contains(accession))
.map(accession -> createSpEnzymeEvidence(accession, evidenceType))
.forEach(evidence -> evidences.add(evidence));
});
return evidences;
}
private List<SpEnzymeEvidence> computeAccessionsWithEvidences() throws InterruptedException, ExecutionException {
List<String> enzymes = enzymePortalService.findAllSwissProtAccessions();
logger.info("num swissprot enzymes from enzyme portal database " + enzymes.size());
CompletableFuture<List<SpEnzymeEvidence>> functionFuture = CompletableFuture
.supplyAsync(() -> tagEvidences(serviceUrl.getFunctionUrl(), EvidenceType.FUNCTION.getEvidenceName(), enzymes));
CompletableFuture<List<SpEnzymeEvidence>> cofactorFuture = CompletableFuture
.supplyAsync(() -> tagEvidences(serviceUrl.getCofactorUrl(), EvidenceType.COFACTOR.getEvidenceName(), enzymes));
CompletableFuture<List<SpEnzymeEvidence>> activityFuture = CompletableFuture
.supplyAsync(() -> tagEvidences(serviceUrl.getActivityUrl(), EvidenceType.CATALYTIC_ACTIVITY.getEvidenceName(), enzymes));
CompletableFuture<List<SpEnzymeEvidence>> regulationFuture = CompletableFuture
.supplyAsync(() -> tagEvidences(serviceUrl.getRegulationUrl(), EvidenceType.ENZYME_REGULATION.getEvidenceName(), enzymes));
CompletableFuture<List<SpEnzymeEvidence>> biophysioFuture = CompletableFuture
.supplyAsync(() -> tagEvidences(serviceUrl.getBioPhysioUrl(), EvidenceType.BIOPHYSICOCHEMICAL_PROPERTIES.getEvidenceName(), enzymes));
CompletableFuture<List<SpEnzymeEvidence>> futures = functionFuture
.thenCombineAsync(cofactorFuture, (functions, cofactors) -> combineList(true, functions, cofactors))
.thenCombineAsync(activityFuture, (entries, activity) -> combineList(true, entries, activity))
.thenCombineAsync(regulationFuture, (entries, regulation) -> combineList(true, entries, regulation))
.thenCombineAsync(biophysioFuture, (entries, bio) -> combineList(true, entries, bio));
List<SpEnzymeEvidence> evidences = futures.get().stream().collect(Collectors.toList());
logger.info("Number of Accessions with Evidences found :: " + evidences.size());
return evidences;
}
private List<SpEnzymeEvidence> combineList(Boolean allowDuplicate, List<SpEnzymeEvidence>... parts) {
List<SpEnzymeEvidence> data = new CopyOnWriteArrayList<>();
for (List<SpEnzymeEvidence> part : parts) {
data.addAll(part);
}
if (!allowDuplicate) {
return data.stream().distinct().collect(Collectors.toList());
}
return data;
}
List<String> combineString(List<String> part1, List<String> part2, Boolean allowDuplicate) {
List<String> data = new CopyOnWriteArrayList<>();
data.addAll(part1);
data.addAll(part2);
if (!allowDuplicate) {
return data.stream().distinct().collect(Collectors.toList());
}
return data;
}
List<SpEnzymeEvidence> combine(List<SpEnzymeEvidence> part1, List<SpEnzymeEvidence> part2, Boolean allowDuplicate) {
List<SpEnzymeEvidence> data = new CopyOnWriteArrayList<>();
data.addAll(part1);
data.addAll(part2);
if (!allowDuplicate) {
return data.stream().distinct().collect(Collectors.toList());
}
return data;
}
}