/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package jdk.nashorn.internal.runtime.regexp.joni; import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; @SuppressWarnings("javadoc") public abstract class Matcher extends IntHolder { protected final Regex regex; protected final char[] chars; protected final int str; protected final int end; protected int msaStart; protected int msaOptions; protected final Region msaRegion; protected int msaBestLen; protected int msaBestS; protected int msaBegin; protected int msaEnd; public Matcher(final Regex regex, final char[] chars) { this(regex, chars, 0, chars.length); } public Matcher(final Regex regex, final char[] chars, final int p, final int end) { this.regex = regex; this.chars = chars; this.str = p; this.end = end; this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1); } // main matching method protected abstract int matchAt(int range, int sstart, int sprev); public final Region getRegion() { return msaRegion; } public final int getBegin() { return msaBegin; } public final int getEnd() { return msaEnd; } protected final void msaInit(final int option, final int start) { msaOptions = option; msaStart = start; if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { msaBestLen = -1; } } public final int match(final int at, final int range, final int option) { msaInit(option, at); final int prev = EncodingHelper.prevCharHead(str, at); if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { return matchAt(end /*range*/, at, prev); } return matchAt(range /*range*/, at, prev); } int low, high; // these are the return values private boolean forwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final IntHolder lowPrev) { int pprev = -1; int p = s; if (Config.DEBUG_SEARCH) { Config.log.println("forward_search_range: "+ "str: " + string + ", end: " + e + ", s: " + s + ", range: " + range); } if (regex.dMin > 0) { p += regex.dMin; } retry:while (true) { p = regex.searchAlgorithm.search(regex, ch, p, e, range); if (p != -1 && p < range) { if (p - regex.dMin < s) { // retry_gate: pprev = p; p++; continue retry; } if (regex.subAnchor != 0) { switch (regex.subAnchor) { case AnchorType.BEGIN_LINE: if (p != string) { final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); if (!EncodingHelper.isNewLine(ch, prev, e)) { // goto retry_gate; pprev = p; p++; continue retry; } } break; case AnchorType.END_LINE: if (p == e) { if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); if (prev != -1 && EncodingHelper.isNewLine(ch, prev, e)) { // goto retry_gate; pprev = p; p++; continue retry; } } } else if (!EncodingHelper.isNewLine(ch, p, e)) { //if () break; // goto retry_gate; pprev = p; p++; continue retry; } break; default: break; } // switch } if (regex.dMax == 0) { low = p; if (lowPrev != null) { // ??? // remove null checks if (low > s) { lowPrev.value = EncodingHelper.prevCharHead(s, p); } else { lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); } } } else { if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { low = p - regex.dMax; if (low > s) { low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev); if (lowPrev != null && lowPrev.value == -1) { lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low); } } else { if (lowPrev != null) { lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, low); } } } } /* no needs to adjust *high, *high is used as range check only */ high = p - regex.dMin; if (Config.DEBUG_SEARCH) { Config.log.println("forward_search_range success: "+ "low: " + (low - string) + ", high: " + (high - string) + ", dmin: " + regex.dMin + ", dmax: " + regex.dMax); } return true; /* success */ } return false; /* fail */ } //while } // low, high private boolean backwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final int adjrange) { int r = range; r += regex.dMin; int p = s; retry:while (true) { p = regex.searchAlgorithm.searchBackward(regex, ch, r, adjrange, e, p, s, r); if (p != -1) { if (regex.subAnchor != 0) { switch (regex.subAnchor) { case AnchorType.BEGIN_LINE: if (p != string) { final int prev = EncodingHelper.prevCharHead(string, p); if (!EncodingHelper.isNewLine(ch, prev, e)) { p = prev; continue retry; } } break; case AnchorType.END_LINE: if (p == e) { if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { final int prev = EncodingHelper.prevCharHead(adjrange, p); if (prev == -1) { return false; } if (EncodingHelper.isNewLine(ch, prev, e)) { p = prev; continue retry; } } } else if (!EncodingHelper.isNewLine(ch, p, e)) { p = EncodingHelper.prevCharHead(adjrange, p); if (p == -1) { return false; } continue retry; } break; default: break; } // switch } /* no needs to adjust *high, *high is used as range check only */ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { low = p - regex.dMax; high = p - regex.dMin; } if (Config.DEBUG_SEARCH) { Config.log.println("backward_search_range: "+ "low: " + (low - string) + ", high: " + (high - string)); } return true; } if (Config.DEBUG_SEARCH) { Config.log.println("backward_search_range: fail."); } return false; } // while } // MATCH_AND_RETURN_CHECK private boolean matchCheck(final int upperRange, final int s, final int prev) { if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { //range = upperRange; if (matchAt(upperRange, s, prev) != -1) { if (!isFindLongest(regex.options)) { return true; } } } else { //range = upperRange; if (matchAt(upperRange, s, prev) != -1) { return true; } } } else { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (matchAt(end, s, prev) != -1) { //range = upperRange; if (!isFindLongest(regex.options)) { return true; } } } else { //range = upperRange; if (matchAt(end, s, prev) != -1) { return true; } } } return false; } public final int search(final int startp, final int rangep, final int option) { int start = startp, range = rangep; int s, prev; int origStart = start; final int origRange = range; if (Config.DEBUG_SEARCH) { Config.log.println("onig_search (entry point): "+ "str: " + str + ", end: " + (end - str) + ", start: " + (start - str) + ", range " + (range - str)); } if (start > end || start < str) { return -1; } /* anchor optimize: resume search range */ if (regex.anchor != 0 && str < end) { int minSemiEnd, maxSemiEnd; if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) { /* search start-position only */ // !begin_position:! if (range > start) { range = start + 1; } else { range = start; } } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) { /* search str-position only */ if (range > start) { if (start != str) { return -1; // mismatch_no_msa; } range = str + 1; } else { if (range <= str) { start = str; range = str; } else { return -1; // mismatch_no_msa; } } } else if ((regex.anchor & AnchorType.END_BUF) != 0) { minSemiEnd = maxSemiEnd = end; // !end_buf:! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) { return -1; // mismatch_no_msa; } } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { final int preEnd = EncodingHelper.stepBack(str, end, 1); maxSemiEnd = end; if (EncodingHelper.isNewLine(chars, preEnd, end)) { minSemiEnd = preEnd; if (minSemiEnd > str && start <= minSemiEnd) { // !goto end_buf;! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) { return -1; // mismatch_no_msa; } } } else { minSemiEnd = end; // !goto end_buf;! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) { return -1; // mismatch_no_msa; } } } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) { // goto !begin_position;! if (range > start) { range = start + 1; } else { range = start; } } } else if (str == end) { /* empty string */ // empty address ? if (Config.DEBUG_SEARCH) { Config.log.println("onig_search: empty string."); } if (regex.thresholdLength == 0) { s = start = str; prev = -1; msaInit(option, start); if (matchCheck(end, s, prev)) { return match(s); } return mismatch(); } return -1; // goto mismatch_no_msa; } if (Config.DEBUG_SEARCH) { Config.log.println("onig_search(apply anchor): " + "end: " + (end - str) + ", start " + (start - str) + ", range " + (range - str)); } msaInit(option, origStart); s = start; if (range > start) { /* forward search */ if (s > str) { prev = EncodingHelper.prevCharHead(str, s); } else { prev = 0; // -1 } if (regex.searchAlgorithm != SearchAlgorithm.NONE) { int schRange = range; if (regex.dMax != 0) { if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { schRange = end; } else { schRange += regex.dMax; if (schRange > end) { schRange = end; } } } if ((end - start) < regex.thresholdLength) { return mismatch(); } if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { do { if (!forwardSearchRange(chars, str, end, s, schRange, this)) { return mismatch(); // low, high, lowPrev } if (s < low) { s = low; prev = value; } while (s <= high) { if (matchCheck(origRange, s, prev)) { return match(s); // ??? } prev = s; s++; } } while (s < range); } /* check only. */ if (!forwardSearchRange(chars, str, end, s, schRange, null)) { return mismatch(); } if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) { do { if (matchCheck(origRange, s, prev)) { return match(s); } prev = s; s++; } while (s < range); return mismatch(); } } do { if (matchCheck(origRange, s, prev)) { return match(s); } prev = s; s++; } while (s < range); if (s == range) { /* because empty match with /$/. */ if (matchCheck(origRange, s, prev)) { return match(s); } } } else { /* backward search */ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { if (origStart < end) { origStart++; // /* is upper range */ } } if (regex.searchAlgorithm != SearchAlgorithm.NONE) { int adjrange; if (range < end) { adjrange = range; } else { adjrange = end; } if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) { do { int schStart = s + regex.dMax; if (schStart > end) { schStart = end; } if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) { return mismatch(); // low, high } if (s > high) { s = high; } while (s != -1 && s >= low) { prev = EncodingHelper.prevCharHead(str, s); if (matchCheck(origStart, s, prev)) { return match(s); } s = prev; } } while (s >= range); return mismatch(); } if ((end - range) < regex.thresholdLength) { return mismatch(); } int schStart = s; if (regex.dMax != 0) { if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { schStart = end; } else { schStart += regex.dMax; if (schStart > end) { schStart = end; } } } if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) { return mismatch(); } } do { prev = EncodingHelper.prevCharHead(str, s); if (matchCheck(origStart, s, prev)) { return match(s); } s = prev; } while (s >= range); } return mismatch(); } private boolean endBuf(final int startp, final int rangep, final int minSemiEnd, final int maxSemiEnd) { int start = startp; int range = rangep; if ((maxSemiEnd - str) < regex.anchorDmin) { return true; // mismatch_no_msa; } if (range > start) { if ((minSemiEnd - start) > regex.anchorDmax) { start = minSemiEnd - regex.anchorDmax; if (start >= end) { /* match with empty at end */ start = EncodingHelper.prevCharHead(str, end); } } if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) { range = maxSemiEnd - regex.anchorDmin + 1; } if (start >= range) { return true; // mismatch_no_msa; } } else { if ((minSemiEnd - range) > regex.anchorDmax) { range = minSemiEnd - regex.anchorDmax; } if ((maxSemiEnd - start) < regex.anchorDmin) { start = maxSemiEnd - regex.anchorDmin; } if (range > start) { return true; // mismatch_no_msa; } } return false; } private int match(final int s) { return s - str; // sstart ??? } private int mismatch() { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (msaBestLen >= 0) { final int s = msaBestS; return match(s); } } // falls through finish: return -1; } }