/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package jdk.nashorn.internal.runtime.regexp.joni.ast; import jdk.nashorn.internal.runtime.regexp.joni.BitSet; import jdk.nashorn.internal.runtime.regexp.joni.CodeRangeBuffer; import jdk.nashorn.internal.runtime.regexp.joni.Config; import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper; import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment; import jdk.nashorn.internal.runtime.regexp.joni.Syntax; import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE; import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE; import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; @SuppressWarnings("javadoc") public final class CClassNode extends Node { private static final int FLAG_NCCLASS_NOT = 1<<0; private static final int FLAG_NCCLASS_SHARE = 1<<1; int flags; public final BitSet bs = new BitSet(); // conditional creation ? public CodeRangeBuffer mbuf; /* multi-byte info or NULL */ private int ctype; // for hashing purposes private final static short AsciiCtypeTable[] = { 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; // node_new_cclass public CClassNode() {} public void clear() { bs.clear(); flags = 0; mbuf = null; } @Override public int getType() { return CCLASS; } @Override public String getName() { return "Character Class"; } @Override public boolean equals(final Object other) { if (!(other instanceof CClassNode)) { return false; } final CClassNode cc = (CClassNode)other; return ctype == cc.ctype && isNot() == cc.isNot(); } @Override public int hashCode() { if (Config.USE_SHARED_CCLASS_TABLE) { int hash = 0; hash += ctype; if (isNot()) { hash++; } return hash + (hash >> 5); } return super.hashCode(); } @Override public String toString(final int level) { final StringBuilder value = new StringBuilder(); value.append("\n flags: ").append(flagsToString()); value.append("\n bs: ").append(pad(bs, level + 1)); value.append("\n mbuf: ").append(pad(mbuf, level + 1)); return value.toString(); } public String flagsToString() { final StringBuilder f = new StringBuilder(); if (isNot()) { f.append("NOT "); } if (isShare()) { f.append("SHARE "); } return f.toString(); } public boolean isEmpty() { return mbuf == null && bs.isEmpty(); } public void addCodeRangeToBuf(final int from, final int to) { mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to); } public void addCodeRange(final ScanEnvironment env, final int from, final int to) { mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to); } public void addAllMultiByteRange() { mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf); } public void clearNotFlag() { if (isNot()) { bs.invert(); mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf); clearNot(); } } // and_cclass public void and(final CClassNode other) { final boolean not1 = isNot(); BitSet bsr1 = bs; final CodeRangeBuffer buf1 = mbuf; final boolean not2 = other.isNot(); BitSet bsr2 = other.bs; final CodeRangeBuffer buf2 = other.mbuf; if (not1) { final BitSet bs1 = new BitSet(); bsr1.invertTo(bs1); bsr1 = bs1; } if (not2) { final BitSet bs2 = new BitSet(); bsr2.invertTo(bs2); bsr2 = bs2; } bsr1.and(bsr2); if (bsr1 != bs) { bs.copy(bsr1); bsr1 = bs; } if (not1) { bs.invert(); } CodeRangeBuffer pbuf = null; if (not1 && not2) { pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false); } else { pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2); if (not1) { pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf); } } mbuf = pbuf; } // or_cclass public void or(final CClassNode other) { final boolean not1 = isNot(); BitSet bsr1 = bs; final CodeRangeBuffer buf1 = mbuf; final boolean not2 = other.isNot(); BitSet bsr2 = other.bs; final CodeRangeBuffer buf2 = other.mbuf; if (not1) { final BitSet bs1 = new BitSet(); bsr1.invertTo(bs1); bsr1 = bs1; } if (not2) { final BitSet bs2 = new BitSet(); bsr2.invertTo(bs2); bsr2 = bs2; } bsr1.or(bsr2); if (bsr1 != bs) { bs.copy(bsr1); bsr1 = bs; } if (not1) { bs.invert(); } CodeRangeBuffer pbuf = null; if (not1 && not2) { pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false); } else { pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2); if (not1) { pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf); } } mbuf = pbuf; } // add_ctype_to_cc_by_range // Encoding out! public void addCTypeByRange(final int ct, final boolean not, final int sbOut, final int mbr[]) { final int n = mbr[0]; if (!not) { for (int i=0; i<n; i++) { for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) { if (j >= sbOut) { if (Config.VANILLA) { if (j == mbr[i * 2 + 2]) { i++; } else if (j > mbr[i * 2 + 1]) { addCodeRangeToBuf(j, mbr[i * 2 + 2]); i++; } } else { if (j >= mbr[i * 2 + 1]) { addCodeRangeToBuf(j, mbr[i * 2 + 2]); i++; } } // !goto sb_end!, remove duplication! for (; i<n; i++) { addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); } return; } bs.set(j); } } // !sb_end:! for (int i=0; i<n; i++) { addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); } } else { int prev = 0; for (int i=0; i<n; i++) { for (int j=prev; j < mbr[2 * i + 1]; j++) { if (j >= sbOut) { // !goto sb_end2!, remove duplication prev = sbOut; for (i=0; i<n; i++) { if (prev < mbr[2 * i + 1]) { addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); } prev = mbr[i * 2 + 2] + 1; } if (prev < 0x7fffffff/*!!!*/) { addCodeRangeToBuf(prev, 0x7fffffff); } return; } bs.set(j); } prev = mbr[2 * i + 2] + 1; } for (int j=prev; j<sbOut; j++) { bs.set(j); } // !sb_end2:! prev = sbOut; for (int i=0; i<n; i++) { if (prev < mbr[2 * i + 1]) { addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); } prev = mbr[i * 2 + 2] + 1; } if (prev < 0x7fffffff/*!!!*/) { addCodeRangeToBuf(prev, 0x7fffffff); } } } public void addCType(final int ctp, final boolean not, final ScanEnvironment env, final IntHolder sbOut) { int ct = ctp; if (Config.NON_UNICODE_SDW) { switch (ct) { case CharacterType.D: case CharacterType.S: case CharacterType.W: ct ^= CharacterType.SPECIAL_MASK; if (env.syntax == Syntax.JAVASCRIPT && ct == CharacterType.SPACE) { // \s in JavaScript includes unicode characters. break; } if (not) { for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) { // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c); if ((AsciiCtypeTable[c] & (1 << ct)) == 0) { bs.set(c); } } addAllMultiByteRange(); } else { for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) { // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c); if ((AsciiCtypeTable[c] & (1 << ct)) != 0) { bs.set(c); } } } return; default: break; } } final int[] ranges = EncodingHelper.ctypeCodeRange(ct, sbOut); if (ranges != null) { addCTypeByRange(ct, not, sbOut.value, ranges); return; } switch(ct) { case CharacterType.ALPHA: case CharacterType.BLANK: case CharacterType.CNTRL: case CharacterType.DIGIT: case CharacterType.LOWER: case CharacterType.PUNCT: case CharacterType.SPACE: case CharacterType.UPPER: case CharacterType.XDIGIT: case CharacterType.ASCII: case CharacterType.ALNUM: if (not) { for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { if (!EncodingHelper.isCodeCType(c, ct)) { bs.set(c); } } addAllMultiByteRange(); } else { for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { if (EncodingHelper.isCodeCType(c, ct)) { bs.set(c); } } } break; case CharacterType.GRAPH: case CharacterType.PRINT: if (not) { for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { if (!EncodingHelper.isCodeCType(c, ct)) { bs.set(c); } } } else { for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { if (EncodingHelper.isCodeCType(c, ct)) { bs.set(c); } } addAllMultiByteRange(); } break; case CharacterType.WORD: if (!not) { for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { if (EncodingHelper.isWord(c)) { bs.set(c); } } addAllMultiByteRange(); } else { for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { if (!EncodingHelper.isWord(c)) { bs.set(c); } } } break; default: throw new InternalException(ErrorMessages.ERR_PARSER_BUG); } // switch } public static final class CCStateArg { public int v; public int vs; public boolean vsIsRaw; public boolean vIsRaw; public CCVALTYPE inType; public CCVALTYPE type; public CCSTATE state; } public void nextStateClass(final CCStateArg arg, final ScanEnvironment env) { if (arg.state == CCSTATE.RANGE) { throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE); } if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) { if (arg.type == CCVALTYPE.SB) { bs.set(arg.vs); } else if (arg.type == CCVALTYPE.CODE_POINT) { addCodeRange(env, arg.vs, arg.vs); } } arg.state = CCSTATE.VALUE; arg.type = CCVALTYPE.CLASS; } public void nextStateValue(final CCStateArg arg, final ScanEnvironment env) { switch(arg.state) { case VALUE: if (arg.type == CCVALTYPE.SB) { if (arg.vs > 0xff) { throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); } bs.set(arg.vs); } else if (arg.type == CCVALTYPE.CODE_POINT) { addCodeRange(env, arg.vs, arg.vs); } break; case RANGE: if (arg.inType == arg.type) { if (arg.inType == CCVALTYPE.SB) { if (arg.vs > 0xff || arg.v > 0xff) { throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); } if (arg.vs > arg.v) { if (env.syntax.allowEmptyRangeInCC()) { // goto ccs_range_end arg.state = CCSTATE.COMPLETE; break; } throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); } bs.setRange(arg.vs, arg.v); } else { addCodeRange(env, arg.vs, arg.v); } } else { if (arg.vs > arg.v) { if (env.syntax.allowEmptyRangeInCC()) { // goto ccs_range_end arg.state = CCSTATE.COMPLETE; break; } throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); } bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff); addCodeRange(env, arg.vs, arg.v); } // ccs_range_end: arg.state = CCSTATE.COMPLETE; break; case COMPLETE: case START: arg.state = CCSTATE.VALUE; break; default: break; } // switch arg.vsIsRaw = arg.vIsRaw; arg.vs = arg.v; arg.type = arg.inType; } // onig_is_code_in_cc_len public boolean isCodeInCCLength(final int code) { boolean found; if (code > 0xff) { found = mbuf != null && mbuf.isInCodeRange(code); } else { found = bs.at(code); } if (isNot()) { return !found; } return found; } // onig_is_code_in_cc public boolean isCodeInCC(final int code) { return isCodeInCCLength(code); } public void setNot() { flags |= FLAG_NCCLASS_NOT; } public void clearNot() { flags &= ~FLAG_NCCLASS_NOT; } public boolean isNot() { return (flags & FLAG_NCCLASS_NOT) != 0; } public void setShare() { flags |= FLAG_NCCLASS_SHARE; } public void clearShare() { flags &= ~FLAG_NCCLASS_SHARE; } public boolean isShare() { return (flags & FLAG_NCCLASS_SHARE) != 0; } }