package org.nextprot.api.core.utils.annot.merge.impl; import com.google.common.base.Preconditions; import org.nextprot.api.commons.exception.NextProtException; import org.nextprot.api.core.domain.annotation.Annotation; import org.nextprot.api.core.utils.annot.merge.AnnotationCluster; import org.nextprot.api.core.utils.annot.merge.AnnotationListMerger; import org.nextprot.api.core.utils.annot.merge.AnnotationMerger; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; /** * Merge two lists of annotations together and return a new list. * <p> * Merging is done in two steps: * * <ol> * <li>a mapping step where groups of annotations are made by similarity</li> * <li>a reducing (merging) step where each group is reduced to a merged annotation</li> * </ol> */ public class AnnotationListMapReduceMerger implements AnnotationListMerger { private final AnnotationMerger annotationMerger; public AnnotationListMapReduceMerger() { this(new AnnotationUpdater()); } public AnnotationListMapReduceMerger(AnnotationMerger annotationMerger) { Preconditions.checkNotNull(annotationMerger); this.annotationMerger = annotationMerger; } // throw an exception when cluster size > 2 /** @return merged annotations */ public List<Annotation> merge(List<Annotation> annotations1, List<Annotation> annotations2) { if (annotations1 == null || annotations1.isEmpty()) { return annotations2; } else if (annotations2 == null || annotations2.isEmpty()) { return annotations1; } List<AnnotationCluster> clusters = clusterSimilarAnnotations(annotations1, annotations2); // Reduce clusters into merged annotations return clusters.stream().map(this::doMerge).collect(Collectors.toList()); } /** * Map similar annotations in cluster * * @param annotationList1 first annotation list * @param annotationList2 second annotation list * @return a list of clusters */ private List<AnnotationCluster> clusterSimilarAnnotations(List<Annotation> annotationList1, List<Annotation> annotationList2) { // wrap each annotation from second list in its own cluster List<AnnotationCluster> annotationClusters = AnnotationCluster.valueOfClusters(annotationList2); //clusterAnnotations(annotationList2, annotationClusters); clusterAnnotations(annotationList1, annotationClusters); return annotationClusters; } private void clusterAnnotations(List<Annotation> annotations, List<AnnotationCluster> annotationClusters) { AnnotationClusterFinder finder = new AnnotationClusterFinder(); for (Annotation annotation : annotations) { Optional<AnnotationCluster> foundAnnotationCluster = finder.find(annotation, annotationClusters); if (foundAnnotationCluster.isPresent()) { try { foundAnnotationCluster.get().add(annotation); } catch (AnnotationCluster.InvalidAnnotationClusterCategoryException e) { throw new NextProtException(e); } } else { annotationClusters.add(AnnotationCluster.valueOf(annotation)); } } } private Annotation doMerge(AnnotationCluster cluster) { if (cluster.size() == 0) throw new IllegalStateException("cluster "+ cluster.getCategory()+" should not be empty"); else if (cluster.size() == 1) return cluster.getAnnotations().get(0); else if (cluster.size() == 2) // the first annotation is the original one return annotationMerger.merge(cluster.getAnnotations().get(0), cluster.getAnnotations().get(1)); else { throw new NextProtException("cannot merge more than 2 annotations from cluster "+cluster.getAnnotations()); } } }