/* * eXist Open Source Native XML Database * Copyright (C) 2001-2015 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ package org.exist.dom.persistent; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.function.Predicate; import org.exist.numbering.NodeId; /** * Used to track matches throughout the query. * <p/> * Index may add a match object to every {@link org.exist.dom.persistent.NodeProxy} * that triggered a match for every term matched. The * Match object contains the nodeId of the text node that triggered the * match, the string value of the matching term and a frequency count, * indicating the frequency of the matching term string within the corresponding * single text node. * <p/> * All path operations copy existing match objects, i.e. the match objects * are copied to the selected descendant or child nodes. This means that * every NodeProxy being the direct or indirect result of a * selection will have one or more match objects, indicating which text nodes * among its descendant nodes contained a match. * * @author wolf */ public abstract class Match implements Comparable<Match> { public static final class Offset implements Comparable<Offset> { private final int offset; private final int length; public Offset(final int offset, final int length) { this.offset = offset; this.length = length; } public int getOffset() { return offset; } public int getLength() { return length; } @Override public int compareTo(final Offset other) { return this.offset - other.offset; } public boolean overlaps(final Offset other) { return (other.offset >= offset && other.offset < offset + length) || (offset >= other.offset && offset < other.offset + other.length); } } private final int context; protected final NodeId nodeId; private final String matchTerm; private int[] offsets; private int[] lengths; private int currentOffset = 0; protected Match nextMatch = null; protected Match(final int contextId, final NodeId nodeId, final String matchTerm) { this(contextId, nodeId, matchTerm, 1); } protected Match(final int contextId, final NodeId nodeId, final String matchTerm, final int frequency) { this.context = contextId; this.nodeId = nodeId; this.matchTerm = matchTerm; this.offsets = new int[frequency]; this.lengths = new int[frequency]; } protected Match(final Match match) { this.context = match.context; this.nodeId = match.nodeId; this.matchTerm = match.matchTerm; this.offsets = match.offsets; this.lengths = match.lengths; this.currentOffset = match.currentOffset; } public NodeId getNodeId() { return nodeId; } public int getFrequency() { return currentOffset; } public int getContextId() { return context; } public abstract Match createInstance(final int contextId, final NodeId nodeId, final String matchTerm); public abstract Match newCopy(); public abstract String getIndexId(); public void addOffset(final int offset, final int length) { if(currentOffset == offsets.length) { final int noffsets[] = new int[currentOffset + 1]; System.arraycopy(offsets, 0, noffsets, 0, currentOffset); offsets = noffsets; final int nlengths[] = new int[currentOffset + 1]; System.arraycopy(lengths, 0, nlengths, 0, currentOffset); lengths = nlengths; } offsets[currentOffset] = offset; lengths[currentOffset++] = length; } private void addOffset(final Offset offset) { addOffset(offset.offset, offset.length); } private void addOffsets(final Collection<Offset> offsets) { offsets.forEach(this::addOffset); } public Offset getOffset(final int pos) { return new Offset(offsets[pos], lengths[pos]); } public List<Offset> getOffsets() { final List<Offset> result = new ArrayList<>(currentOffset); for(int i = 0; i < currentOffset; i++) { result.add(getOffset(i)); } return result; } /** * Constructs a match starting with this match and continued by the other match if possible * * @param other a match continuing this match * @return a match starting with this match and continued by the other match * if such a match exists or null if no continuous match found */ public Match continuedBy(final Match other) { return followedBy(other, 0, 0); } /** * Constructs a match starting with this match and followed by the other match if possible * * @param other a match following this match * @param minDistance the minimum distance between this and the other match * @param maxDistance the maximum distance between this and the other match * @return a match starting with this match and followed by * the other match in the specified distance range if such * a match exists or null if no such match found */ public Match followedBy(final Match other, final int minDistance, final int maxDistance) { final List<Offset> newMatchOffsets = new LinkedList<>(); for(int i = 0; i < currentOffset; i++) { for(int j = 0; j < other.currentOffset; j++) { final int distance = other.offsets[j] - (offsets[i] + lengths[i]); if(distance >= minDistance && distance <= maxDistance) { newMatchOffsets.add(new Offset(offsets[i], lengths[i] + distance + other.lengths[j])); } } } if(newMatchOffsets.isEmpty()) { return null; } final int wildCardSize = newMatchOffsets.get(0).length - matchTerm.length() - other.matchTerm.length(); final StringBuilder matched = new StringBuilder(matchTerm); for(int ii = 0; ii < wildCardSize; ii++) { matched.append('?'); } matched.append(other.matchTerm); final Match result = createInstance(context, nodeId, matched.toString()); result.addOffsets(newMatchOffsets); return result; } /** * Expand the match backwards by at least minExpand up to maxExpand characters. * The match is expanded as much as possible. * * @param minExpand The minimum number of characters to expand this match by * @param maxExpand The maximum number of characters to expand this match by * @return The expanded match if possible, or null if no offset is far enough from the start. */ public Match expandBackward(final int minExpand, final int maxExpand) { Match result = null; for(int i = 0; i < currentOffset; i++) { if(offsets[i] - minExpand >= 0) { if(result == null) { final StringBuilder matched = new StringBuilder(); for(int ii = 0; ii < minExpand; ii++) { matched.append('?'); } matched.append(matchTerm); result = createInstance(context, nodeId, matched.toString()); } final int expand = Math.min(offsets[i], maxExpand); result.addOffset(offsets[i] - expand, lengths[i] + expand); } } return result; } /** * Expand the match forward by at least minExpand up to maxExpand characters. * The match is expanded as much as possible. * * @param minExpand The minimum number of characters to expand this match by * @param maxExpand The maximum number of characters to expand this match by * @param dataLength The length of the valued of the node, limiting the expansion * @return The expanded match if possible, or null if no offset is far enough from the end. */ public Match expandForward(final int minExpand, final int maxExpand, final int dataLength) { Match result = null; for(int i = 0; i < currentOffset; i++) { if(offsets[i] + lengths[i] + minExpand <= dataLength) { final int expand = Math.min(dataLength - offsets[i] - lengths[i], maxExpand); if(result == null) { final StringBuilder matched = new StringBuilder(matchTerm); for(int ii = 0; ii < expand; ii++) { matched.append('?'); } result = createInstance(context, nodeId, matched.toString()); } result.addOffset(offsets[i], lengths[i] + expand); } } return result; } private Match filterOffsets(final Predicate<Offset> predicate) { final Match result = createInstance(context, nodeId, matchTerm); getOffsets().stream().filter(predicate).forEach(result::addOffset); if(result.currentOffset == 0) { return null; } else { return result; } } /** * Creates a match containing only those offsets starting at the given position. * * @param pos Required offset * @return a match containing only offsets starting at the given position, * or null if no such offset exists. */ public Match filterOffsetsStartingAt(final int pos) { return filterOffsets(offset -> offset.offset == pos); } /** * Creates a match containing only those offsets ending at the given position. * * @param pos Required position of the end of the matches * @return A match containing only offsets ending at the given position, * or null if no such offset exists. */ public Match filterOffsetsEndingAt(final int pos) { return filterOffsets(offset -> offset.offset + offset.length == pos); } /** * Creates a match containing only non-overlapping offsets, * preferring longer matches, and then matches from left to right. * * @return a match containing only non-overlapping offsets */ public Match filterOutOverlappingOffsets() { if(currentOffset == 0) { return newCopy(); } final List<Offset> newMatchOffsets = getOffsets(); Collections.sort(newMatchOffsets, (o1, o2) -> { final int lengthDiff = o2.length - o1.length; if(lengthDiff != 0) { return lengthDiff; } else { return o1.offset - o2.offset; } }); final List<Offset> nonOverlappingMatchOffsets = new LinkedList<>(); nonOverlappingMatchOffsets.add(newMatchOffsets.remove(0)); for(final Offset o : newMatchOffsets) { boolean overlapsExistingOffset = false; for(final Offset eo : nonOverlappingMatchOffsets) { if(eo.overlaps(o)) { overlapsExistingOffset = true; break; } } if(!overlapsExistingOffset) { nonOverlappingMatchOffsets.add(o); } } final Match result = createInstance(context, nodeId, matchTerm); result.addOffsets(nonOverlappingMatchOffsets); return result; } /** * Return true if there's a match starting at the given * character position. * * @param pos the position * @return true if a match starts at the given position */ public boolean hasMatchAt(final int pos) { for(int i = 0; i < currentOffset; i++) { if(offsets[i] == pos) { return true; } } return false; } /** * Returns true if the given position is within a match. * * @param pos the position * @return true if the given position is within a match */ public boolean hasMatchAround(final int pos) { for(int i = 0; i < currentOffset; i++) { if(offsets[i] + lengths[i] >= pos) { return true; } } return false; } public void mergeOffsets(final Match other) { for(int i = 0; i < other.currentOffset; i++) { if(!hasMatchAt(other.offsets[i])) { addOffset(other.offsets[i], other.lengths[i]); } } } public Match getNextMatch() { return nextMatch; } public static boolean matchListEquals(final Match m1, final Match m2) { Match n1 = m1; Match n2 = m2; while(n1 != null) { if(n2 == null || n1 != n2) { return false; } n1 = n1.nextMatch; n2 = n2.nextMatch; } return true; } @Override public boolean equals(final Object other) { if(!(other instanceof Match)) { return false; } final Match om = (Match) other; return om.matchTerm != null && om.matchTerm.equals(matchTerm) && om.nodeId.equals(nodeId); } public boolean matchEquals(final Match other) { if(this == other) { return true; } return (nodeId == other.nodeId || nodeId.equals(other.nodeId)) && matchTerm.equals(other.matchTerm); } /** * Used to sort matches. Terms are compared by their string * length to have the longest string first. * * @see java.lang.Comparable#compareTo(java.lang.Object) */ @Override public int compareTo(final Match other) { return matchTerm.compareTo(other.matchTerm); } @Override public String toString() { final StringBuilder buf = new StringBuilder(); if(matchTerm != null) { buf.append(matchTerm); } for(int i = 0; i < currentOffset; i++) { buf.append(" ["); buf.append(offsets[i]).append(':').append(lengths[i]); buf.append("]"); } if(nextMatch != null) { buf.append(' ').append(nextMatch.toString()); } return buf.toString(); } }