package org.nextprot.api.core.utils.annot;
import com.nextprot.api.annotation.builder.statement.dao.StatementDao;
import org.apache.log4j.Logger;
import org.nextprot.api.commons.constants.AnnotationCategory;
import org.nextprot.api.commons.service.MasterIdentifierService;
import org.nextprot.api.core.domain.Entry;
import org.nextprot.api.core.domain.annotation.Annotation;
import org.nextprot.api.core.service.EntryBuilderService;
import org.nextprot.api.core.service.fluent.EntryConfig;
import org.nextprot.commons.statements.StatementField;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.*;
import java.util.stream.Collectors;
public class AnnotationExporter {
private static final Logger LOGGER = Logger.getLogger(AnnotationExporter.class);
private final EntryBuilderService entryBuilderService;
private final StatementDao statementDao;
private final MasterIdentifierService masterIdentifierService;
private final Config config;
private final Map<AnnotationCategory, NpBedMergingStats> statisticsMap = new EnumMap<>(AnnotationCategory.class);
public AnnotationExporter(EntryBuilderService entryBuilderService, StatementDao statementDao, MasterIdentifierService masterIdentifierService) {
this(entryBuilderService, statementDao, masterIdentifierService, new Config());
}
public AnnotationExporter(EntryBuilderService entryBuilderService, StatementDao statementDao, MasterIdentifierService masterIdentifierService,
Config config) {
this.entryBuilderService = entryBuilderService;
this.statementDao = statementDao;
this.masterIdentifierService = masterIdentifierService;
this.config = config;
}
public String exportAllGeneStatementsAsTsvString() {
List<String> geneNames = statementDao.findAllDistinctValuesforField(StatementField.GENE_NAME);
return exportAnnotationStatsAsTsvString(geneNames);
}
public String exportAnnotationStatsAsTsvString(List<String> geneNames) {
StringBuilder sb = new StringBuilder();
// header row
sb.append(config.fields.stream().collect(Collectors.joining("\t"))).append("\n");
calcAnnotationStatsFromGeneNames(geneNames);
// rows
for (Map.Entry<AnnotationCategory, NpBedMergingStats> entry : statisticsMap.entrySet()) {
NpBedMergingStats stats = entry.getValue();
sb
.append(entry.getKey())
.append("\t")
.append(stats.countAnnots(NpBedMergingStats.AnnotType.MERGED))
.append("\t")
.append(stats.countAnnots(NpBedMergingStats.AnnotType.UNMERGED_BED))
.append("\t")
.append(stats.countAnnots(NpBedMergingStats.AnnotType.UNMERGED_NP))
.append("\t")
.append((stats.countAnnots(NpBedMergingStats.AnnotType.MERGED)>0) ? toAnnotationString(stats.getAnnots(NpBedMergingStats.AnnotType.MERGED).get(0)) : "")
.append("\t")
.append((stats.countAnnots(NpBedMergingStats.AnnotType.UNMERGED_BED)>0) ? stats.getAnnots(NpBedMergingStats.AnnotType.UNMERGED_BED).get(0).getUniqueName() : "")
.append("\t")
.append((stats.countAnnots(NpBedMergingStats.AnnotType.UNMERGED_NP)>0) ? stats.getAnnots(NpBedMergingStats.AnnotType.UNMERGED_NP).get(0).getUniqueName() : "")
.append("\n");
}
return sb.toString();
}
private String toAnnotationString(Annotation annotation) {
return annotation.getUniqueName()+" | "+annotation.getAnnotationHash();
}
private void calcAnnotationStatsFromGeneNames(List<String> geneNames) {
List<String> accessions = new ArrayList<>();
for (String geneName : geneNames) {
Set<String> set = masterIdentifierService.findEntryAccessionByGeneName(geneName, false);
if (set.isEmpty()) LOGGER.warn("could not find " + geneName);
accessions.add(set.iterator().next());
}
calcAnnotationStatsFromEntryAccessions(accessions);
}
private void calcAnnotationStatsFromEntryAccessions(List<String> entryAccessions) {
statisticsMap.clear();
for (String entryAccession : entryAccessions) {
Entry entry = entryBuilderService.build(EntryConfig.newConfig(entryAccession).withAnnotations());
// statement rows
for (Annotation annotation : entry.getAnnotations()) {
if (!statisticsMap.containsKey(annotation.getAPICategory())) {
statisticsMap.put(annotation.getAPICategory(), new NpBedMergingStats());
}
NpBedMergingStats stats = statisticsMap.get(annotation.getAPICategory());
//sb.append(config.fields.stream().map(statement::getValue).collect(Collectors.joining("\t"))).append("\n");
String annotationHash = annotation.getAnnotationHash();
String uniqueName = annotation.getUniqueName();
// bed integrated with np1
if (annotationHash != null) {
// merged
if (!uniqueName.equals(annotationHash)) {
stats.addAnnot(NpBedMergingStats.AnnotType.MERGED, annotation);
}
// not merged
else {
stats.addAnnot(NpBedMergingStats.AnnotType.UNMERGED_BED, annotation);
}
}
// np1
else {
stats.addAnnot(NpBedMergingStats.AnnotType.UNMERGED_NP, annotation);
}
}
}
}
public Map<AnnotationCategory, NpBedMergingStats> getStatisticsMap() {
return statisticsMap;
}
public void exportAsTsvFile(String directory, String fileName, String content) throws FileNotFoundException {
String filename = directory + File.separator + fileName+".tsv";
PrintWriter pw = new PrintWriter(filename);
pw.append(content);
pw.close();
}
public static class Config {
private final Set<AnnotationCategory> categories;
private final List<String> fields = Arrays.asList("category", "merged#", "unmerged_bed#", "unmerged_np1#", "merged_ex", "unmerged_bed_ex", "unmerged_np1_ex");
public Config() {
this(EnumSet.of(AnnotationCategory.VARIANT, AnnotationCategory.MUTAGENESIS));
}
public Config(String... categories) {
this.categories = new HashSet<>(categories.length);
for (String category : categories) {
this.categories.add(AnnotationCategory.getByDbAnnotationTypeName(category));
}
}
public Config(Set<AnnotationCategory> categories) {
this.categories = categories;
}
public Set<AnnotationCategory> getCategories() {
return categories;
}
public List<String> getFields() {
return fields;
}
@Override
public String toString() {
return "Config{" +
"categories=" + categories +
", fields=" + fields +
'}';
}
}
public static class NpBedMergingStats {
public enum AnnotType {
MERGED, UNMERGED_BED, UNMERGED_NP
}
private Map<AnnotType, List<Annotation>> annots = new EnumMap<>(AnnotType.class);
NpBedMergingStats() {
for (AnnotType type : AnnotType.values()) {
annots.put(type, new ArrayList<>());
}
}
public void addAnnot(AnnotType type, Annotation annotation) {
annots.get(type).add(annotation);
}
public int countAnnots(AnnotType type) {
return annots.get(type).size();
}
public List<Annotation> getAnnots(AnnotType type) {
return annots.get(type);
}
}
}