/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package jdk.nashorn.internal.runtime.regexp.joni; import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState; import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; @SuppressWarnings("javadoc") public final class Regex implements RegexState { int[] code; /* compiled pattern */ int codeLength; boolean stackNeeded; Object[] operands; /* e.g. shared CClassNode */ int operandLength; int numMem; /* used memory(...) num counted from 1 */ int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ int captureHistory; /* (?@...) flag (1-31) */ int btMemStart; /* need backtrack flag */ int btMemEnd; /* need backtrack flag */ int stackPopLevel; int[] repeatRangeLo; int[] repeatRangeHi; WarnCallback warnings; MatcherFactory factory; protected Analyser analyser; int options; final int caseFoldFlag; /* optimization info (string search, char-map and anchors) */ SearchAlgorithm searchAlgorithm; /* optimize flag */ int thresholdLength; /* search str-length for apply optimize */ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ int anchorDmin; /* (SEMI_)END_BUF anchor distance */ int anchorDmax; /* (SEMI_)END_BUF anchor distance */ int subAnchor; /* start-anchor for exact or map */ char[] exact; int exactP; int exactEnd; byte[] map; /* used as BM skip or char-map */ int[] intMap; /* BM skip for exact_len > 255 */ int[] intMapBackward; /* BM skip for backward search */ int dMin; /* min-distance of exact or map */ int dMax; /* max-distance of exact or map */ char[][] templates; int templateNum; public Regex(final CharSequence cs) { this(cs.toString()); } public Regex(final String str) { this(str.toCharArray(), 0, str.length(), 0); } public Regex(final char[] chars) { this(chars, 0, chars.length, 0); } public Regex(final char[] chars, final int p, final int end) { this(chars, p, end, 0); } public Regex(final char[] chars, final int p, final int end, final int option) { this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT); } // onig_new public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax) { this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT); } public Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings) { this(chars, p, end, option, Syntax.RUBY, warnings); } // onig_new public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings) { this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings); } // onig_alloc_init public Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings) { int option = optionp; if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) == (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) { throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS); } if ((option & Option.NEGATE_SINGLELINE) != 0) { option |= syntax.options; option &= ~Option.SINGLELINE; } else { option |= syntax.options; } this.options = option; this.caseFoldFlag = caseFoldFlag; this.warnings = warnings; this.analyser = new Analyser(new ScanEnvironment(this, syntax), chars, p, end); this.analyser.compile(); this.warnings = null; } public synchronized MatcherFactory compile() { if (factory == null && analyser != null) { new ArrayCompiler(analyser).compile(); analyser = null; // only do this once } assert factory != null; return factory; } public Matcher matcher(final char[] chars) { return matcher(chars, 0, chars.length); } public Matcher matcher(final char[] chars, final int p, final int end) { MatcherFactory matcherFactory = factory; if (matcherFactory == null) { matcherFactory = compile(); } return matcherFactory.create(this, chars, p, end); } public WarnCallback getWarnings() { return warnings; } public int numberOfCaptures() { return numMem; } /* set skip map for Boyer-Moor search */ void setupBMSkipMap() { final char[] chars = exact; final int p = exactP; final int end = exactEnd; final int len = end - p; if (len < Config.CHAR_TABLE_SIZE) { // map/skip if (map == null) { map = new byte[Config.CHAR_TABLE_SIZE]; } for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { map[i] = (byte)len; } for (int i=0; i<len-1; i++) { map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ?? } } else { if (intMap == null) { intMap = new int[Config.CHAR_TABLE_SIZE]; } for (int i=0; i<len-1; i++) { intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ?? } } } void setExactInfo(final OptExactInfo e) { if (e.length == 0) { return; } // shall we copy that ? exact = e.chars; exactP = 0; exactEnd = e.length; if (e.ignoreCase) { searchAlgorithm = new SearchAlgorithm.SLOW_IC(this); } else { if (e.length >= 2) { setupBMSkipMap(); searchAlgorithm = SearchAlgorithm.BM; } else { searchAlgorithm = SearchAlgorithm.SLOW; } } dMin = e.mmd.min; dMax = e.mmd.max; if (dMin != MinMaxLen.INFINITE_DISTANCE) { thresholdLength = dMin + (exactEnd - exactP); } } void setOptimizeMapInfo(final OptMapInfo m) { map = m.map; searchAlgorithm = SearchAlgorithm.MAP; dMin = m.mmd.min; dMax = m.mmd.max; if (dMin != MinMaxLen.INFINITE_DISTANCE) { thresholdLength = dMin + 1; } } void setSubAnchor(final OptAnchorInfo anc) { subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; subAnchor |= anc.rightAnchor & AnchorType.END_LINE; } void clearOptimizeInfo() { searchAlgorithm = SearchAlgorithm.NONE; anchor = 0; anchorDmax = 0; anchorDmin = 0; subAnchor = 0; exact = null; exactP = exactEnd = 0; } public String optimizeInfoToString() { final StringBuilder s = new StringBuilder(); s.append("optimize: ").append(searchAlgorithm.getName()).append("\n"); s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor)); if ((anchor & AnchorType.END_BUF_MASK) != 0) { s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax)); } s.append("\n"); if (searchAlgorithm != SearchAlgorithm.NONE) { s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n"); } s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n"); s.append("threshold length: ").append(thresholdLength).append("\n"); if (exact != null) { s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n"); } else if (searchAlgorithm == SearchAlgorithm.MAP) { int n=0; for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { if (map[i] != 0) { n++; } } s.append("map: n = ").append(n).append("\n"); if (n > 0) { int c=0; s.append("["); for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { if (map[i] != 0) { if (c > 0) { s.append(", "); } c++; // TODO if (enc.isPrint(i) s.append((char)i); } } s.append("]\n"); } } return s.toString(); } public int getOptions() { return options; } public String dumpTree() { return analyser == null ? null : analyser.root.toString(); } public String dumpByteCode() { compile(); return new ByteCodePrinter(this).byteCodeListToString(); } }