//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.uima; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Generic span type that can be sorted. * * Comparing will order by start (earliest first), then end (earliest first) then value (as per String compare). * * * */ public class ComparableTextSpan implements Comparable<ComparableTextSpan> { private final int start; private final int end; private final String value; /** New instance with a start and end point. * @param start offset in the text * @param end offset in the text */ public ComparableTextSpan(int start, int end) { this(start, end, null); } /** * @param start * @param end * @param value */ public ComparableTextSpan(int start, int end, String value) { this.start = start; this.end = end; this.value = value; } @Override public int compareTo(ComparableTextSpan s) { if (s.start > this.start) { return -1; } else if (s.start < this.start) { return 1; } else if (s.end > this.end) { return -1; } else if (s.end < this.end) { return 1; } else if(value == null && s.value == null) { return 0; } else if(value != null && s.value == null) { return 1; } else if(value == null && s.value != null) { return -1; } else { return value.compareTo(s.value); } } /** Get the start offset of this span. * @return the start offset */ public int getStart() { return start; } /** Get the end offset of this span. * @return the end offset */ public int getEnd() { return end; } /** Get the value associated with this value. * * This may not be the text within the span! * * @return the value (perhaps null) */ public String getValue() { return value; } /** Check is this span has a non-null value. * @return boolean if value is non-null */ public boolean hasValue() { return value != null; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + end; result = prime * result + start; result = prime * result + ((value == null) ? 0 : value.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; ComparableTextSpan other = (ComparableTextSpan) obj; if (end != other.end) return false; if (start != other.start) return false; if (value == null) { if (other.value != null) return false; } else if (!value.equals(other.value)) return false; return true; } @Override public String toString() { return String.format("%d:%d[%s]", start, end, value); } /** Build a set of comparable spans from the text using regex. * * @param text the text to extract spans from * @param pattern the regex pattern to use to create extract terms * @return a list (non-null, but possible empty) of matches. With regex gorup as value */ public static List<ComparableTextSpan> buildSpans(String text, Pattern pattern) { Matcher m = pattern.matcher(text); List<ComparableTextSpan> spans = new LinkedList<>(); while(m.find()){ ComparableTextSpan cs = new ComparableTextSpan(m.start(), m.end(), m.group()); spans.add(cs); } return spans; } }