//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.patterns.data; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import uk.gov.dstl.baleen.types.language.WordToken; import uk.gov.dstl.baleen.types.semantic.Entity; /** * A java bean corresponding to an extracted Pattern. * * A pattern is a range of text between two entities. */ public final class PatternExtract { /** The start. */ private final int start; /** The end. */ private final int end; /** The from. */ private final Entity from; /** The to. */ private final Entity to; /** The words. */ private List<WordToken> words; /** * Instantiates a new pattern extract. * * @param from * the first entity (start of the pattern) * @param to * the second entity (end of the pattern) * @param start * the start index * @param end * the end index */ public PatternExtract(final Entity from, final Entity to, final int start, final int end) { this.from = from; this.to = to; this.start = start; this.end = end; } /** * Get the first entity. * * @return entity */ public Entity getFrom() { return from; } /** * Gets the second entitys * * @return entity */ public Entity getTo() { return to; } /** * Gets the start. * * @return the start */ public int getStart() { return start; } /** * Gets the end. * * @return the end */ public int getEnd() { return end; } /** * Sets the word tokens (which form the pattern, and are beneath the start-end range). * * @param words * the new word tokens */ public void setWordTokens(final List<WordToken> words) { this.words = words; } /** * Gets the word tokens (must have been previously set) * * @return the word tokens */ public List<WordToken> getWordTokens() { return words; } /** * Determine if any of the needles are contained in this covering document text. * * @param documentText * the document text * @param needles * the needles * @return true, if successful */ public boolean contains(final String documentText, final String... needles) { final String text = getCoveredText(documentText); return Arrays.stream(needles).anyMatch(text::contains); } /** * Gets the covered text. * * @param documentText * the document text * @return the covered text */ public String getCoveredText(final String documentText) { return documentText.substring(start, end); } /** * Gets the text formed of the concatenated word tokens. * * Hence this a 'sanitised text' rather than the covered text. * * @return the text */ public String getText() { if (words == null) { return ""; } return words.stream() .map(w -> w.getCoveredText()).collect(Collectors.joining(" ")); } /** * Checks if is empty, based on the word tokens (not range, start/end, etc) * * @return true, if is empty */ public boolean isEmpty() { return words == null || words.isEmpty(); } }