package nl.tudelft.lifetiles.sequence.model; import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.ToIntBiFunction; import nl.tudelft.lifetiles.sequence.Mutation; /** * @author Rutger van den Berg Contains a partial sequence. */ public class SequenceSegment implements Comparable<SequenceSegment> { /** * Keep track of already used ID's. */ private static AtomicInteger nextId = new AtomicInteger(); /** * Identifier for this segment. */ private final int identifier; /** * The content of this segment. */ private SegmentContent content; /** * Contains the sources containing this segment. */ private Set<Sequence> sources; /** * The start position for this segment. */ private final long start; /** * The end position for this segment. */ private final long end; /** * The unified start position for this segment. */ private long unifiedStart = 1; /** * The unified end position for this segment. */ private long unifiedEnd = Long.MAX_VALUE; /** * The start position in comparison with the reference. */ private long referenceStart = 1; /** * The end position in comparison with the reference. */ private long referenceEnd = Long.MAX_VALUE; /** * The mutation annotation of this segment. */ private Mutation mutation; /** * Used for compareTo. */ private static final List<ToIntBiFunction<SequenceSegment, SequenceSegment>> COMPARATORS = Arrays .asList((left, right) -> { return Long.compare(left.getUnifiedStart(), right.getUnifiedStart()); }, (left, right) -> { return Long.compare(left.getStart(), right.getStart()); }, (left, right) -> { return Long.compare(left.getUnifiedEnd(), right.getUnifiedEnd()); }, (left, right) -> { return Long.compare(left.getEnd(), right.getEnd()); }, (left, right) -> { return left.getContent().toString() .compareTo(right.getContent().toString()); }, (left, right) -> { return left.getSources().size() - right.getSources().size(); }); /** * @param sources * The sources containing this segment. * @param startPosition * The start position for this segment. * @param endPosition * The end position for this segment. * @param content * The content for this segment. */ public SequenceSegment(final Set<Sequence> sources, final long startPosition, final long endPosition, final SegmentContent content) { this.sources = sources; this.start = startPosition; this.end = endPosition; this.content = content; identifier = nextId.incrementAndGet(); } /** * Copy constructor. * * @param original * The SequenceSegment to base the new one on. */ public SequenceSegment(final SequenceSegment original) { this.sources = new HashSet<Sequence>(original.getSources()); this.start = original.getStart(); this.end = original.getEnd(); this.content = original.getContent(); this.identifier = original.getIdentifier(); this.unifiedEnd = original.getUnifiedEnd(); this.unifiedStart = original.getUnifiedStart(); this.mutation = original.getMutation(); this.referenceStart = original.getReferenceStart(); this.referenceEnd = original.getReferenceEnd(); } /** * Change the content of the segment. * * @param content * new content. */ public void setContent(final SegmentContent content) { this.content = content; } /** * @return the content */ public SegmentContent getContent() { return content; } /** * @return the end position */ public long getEnd() { return end; } /** * @return the sources */ public Set<Sequence> getSources() { return sources; } /** * Change the current sources to the new sources. * * @param set * new sources */ public void setSources(final Set<Sequence> set) { sources = set; } /** * @return the start position */ public long getStart() { return start; } /** * @return the unified start position */ public long getUnifiedStart() { return unifiedStart; } /** * @param unifiedStart * unified start position of this sequence segment. */ public void setUnifiedStart(final long unifiedStart) { this.unifiedStart = unifiedStart; } /** * @return the unified end position */ public long getUnifiedEnd() { return unifiedEnd; } /** * @param unifiedEnd * unified end position of this sequence segment. */ public void setUnifiedEnd(final long unifiedEnd) { this.unifiedEnd = unifiedEnd; } /** * @return mutation annotation of sequence segment. */ public Mutation getMutation() { return mutation; } /** * @param mutation * Mutation which is annotated onto the sequence segment. */ public void setMutation(final Mutation mutation) { this.mutation = mutation; } /** * Returns the distance between this sequence segment and another. * (non-euclidian distance) * * @param other * Sequence segment which needs to be compared. * @return * Distance between this sequence and other sequence. */ public long distanceTo(final SequenceSegment other) { return other.getUnifiedStart() - getUnifiedEnd() - 1; } /** * Compares two segments, first by start positions, then end positions, then * content, then sources. * * @param other * Sequence segment which needs to be compared. * @return the compare value of the start positions. */ @Override public int compareTo(final SequenceSegment other) { for (ToIntBiFunction<SequenceSegment, SequenceSegment> comp : COMPARATORS) { int result = comp.applyAsInt(this, other); if (result != 0) { return result; } } int candidateComp = 0; Iterator<Sequence> thisIt = this.getSources().iterator(); Iterator<Sequence> otherIt = other.getSources().iterator(); while (thisIt.hasNext()) { candidateComp = thisIt.next().getIdentifier() .compareTo(otherIt.next().getIdentifier()); if (candidateComp != 0) { return candidateComp; } } if (this.getIdentifier() == other.getIdentifier()) { candidateComp = 0; } return candidateComp; } /** * Returns the reference start position. * * @return reference start position. */ public long getReferenceStart() { return referenceStart; } /** * Returns the reference end position. * * @return reference end position. */ public long getReferenceEnd() { return referenceEnd; } /** * Sets the reference start position. * * @param referenceStart * Reference start position. */ public void setReferenceStart(final long referenceStart) { this.referenceStart = referenceStart; } /** * Sets the reference end position. * * @param referenceEnd * Reference end position. */ public void setReferenceEnd(final long referenceEnd) { this.referenceEnd = referenceEnd; } /** * {@inheritDoc} */ @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + identifier; return result; } /** * {@inheritDoc} */ @Override public boolean equals(final Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (!(obj instanceof SequenceSegment)) { return false; } SequenceSegment other = (SequenceSegment) obj; return this.getIdentifier() == other.getIdentifier(); } /** * @return the identifier */ public int getIdentifier() { return identifier; } /** * Calculates the mutation for this sequence segment given that this segment * is not part of the reference sequence. * * @return calculated mutation type of this segment. */ public Mutation determineMutation() { Mutation mutation; if (content.isEmpty()) { mutation = Mutation.DELETION; } else if (referenceStart > referenceEnd) { mutation = Mutation.INSERTION; } else { mutation = Mutation.POLYMORPHISM; } return mutation; } /** * Calculate the contextless interestingness of this sequence. This is * effectivly the fraction between length and number of sources, unknown * nucleotides weigh less. * * @return the interestingness score */ public double interestingness() { if (content.isEmpty()) { return 0; } final String uninteresting = "N"; final String rawContent = content.toString(); final String cleanContent = rawContent.replace(uninteresting, ""); double contentScore = (rawContent.length() + cleanContent.length()) / 2.0; return contentScore / sources.size(); } }