/**
* Copyright (c) 2012-2016 André Bargull
* Alle Rechte vorbehalten / All Rights Reserved. Use is subject to license terms.
*
* <https://github.com/anba/es6draft>
*/
package com.github.anba.es6draft.regexp;
import org.jcodings.ApplyAllCaseFoldFunction;
import org.jcodings.CaseFoldCodeItem;
import org.jcodings.IntHolder;
import org.jcodings.constants.CharacterType;
import org.jcodings.unicode.UnicodeEncoding;
import com.github.anba.es6draft.parser.Characters;
/**
*
*/
abstract class UEncoding extends UnicodeEncoding {
protected UEncoding(String name, int minLength, int maxLength) {
super(name, minLength, maxLength, null);
}
/**
* Encodes the charsequence into a byte array using this encoding.
*
* @param cs
* the charsequence
* @return the encoded byte array
*/
public abstract byte[] toBytes(CharSequence cs);
/**
* Encodes the string into a byte array using this encoding.
*
* @param s
* the string
* @return the encoded byte array
*/
public abstract byte[] toBytes(String s);
/**
* Returns the string length when reading {@code count} bytes from the substring
* {@code cs.subSequence(start, cs.length)}.
*
* @param cs
* the charsequence
* @param start
* the string start index
* @param count
* the number of bytes to read
* @return the string length
*/
public abstract int strLength(CharSequence cs, int start, int count);
/**
* Returns the number of bytes required to represent the charsequence in this encoding.
*
* @param cs
* the charsequence
* @return the byte length
*/
public abstract int length(CharSequence cs);
/**
* Returns {@code length(cs.subSequence(start, end))}.
*
* @param cs
* the charsequence
* @param start
* the string start index
* @param end
* the string end index
* @return the byte length
*/
public abstract int length(CharSequence cs, int start, int end);
/**
* Returns the length in bytes of the character at the byte index {@code byteIndex}.
*
* @param cs
* the charsequence
* @param byteIndex
* the byte index
* @return the character byte length
*/
public abstract int length(CharSequence cs, int byteIndex);
@Override
public int mbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to) {
int p = pp.value;
pp.value += length(bytes, p, end);
int codePoint = mbcToCode(bytes, p, end);
int caseFold = CaseFoldData.caseFold(codePoint);
return codeToMbc(caseFold >= 0 ? caseFold : codePoint, to, 0);
}
@Override
public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
int[] unfoldFrom = CaseFoldData.caseUnfoldFrom();
int[][] unfoldTo = CaseFoldData.caseUnfoldTo();
applyAllCaseFold(fun, arg, unfoldFrom, unfoldTo);
}
protected final void applyAllCaseFold(ApplyAllCaseFoldFunction f, Object arg, int[] unfoldFrom, int[][] unfoldTo) {
int[] code = new int[1];
for (int i = 0; i < unfoldFrom.length; ++i) {
int from = unfoldFrom[i];
int[] to = unfoldTo[i];
for (int j = 0; j < to.length; ++j) {
int codePoint = to[j];
code[0] = from;
f.apply(codePoint, code, 1, arg);
code[0] = codePoint;
f.apply(from, code, 1, arg);
for (int k = 0; k < j; k++) {
int otherCodePoint = to[k];
code[0] = otherCodePoint;
f.apply(codePoint, code, 1, arg);
code[0] = codePoint;
f.apply(otherCodePoint, code, 1, arg);
}
}
}
}
@Override
public CaseFoldCodeItem[] caseFoldCodesByString(int flag, byte[] bytes, int p, int end) {
int codePoint = mbcToCode(bytes, p, end);
int length = length(bytes, p, end);
int caseFold = CaseFoldData.caseFold(codePoint);
if (caseFold >= 0) {
int[] to = CaseFoldData.caseUnfold(caseFold);
if (to != null) {
return caseFoldCodesByString(codePoint, length, caseFold, to);
}
return new CaseFoldCodeItem[] { new CaseFoldCodeItem(length, 1, new int[] { caseFold }) };
}
int[] to = CaseFoldData.caseUnfold(codePoint);
if (to != null) {
return caseFoldCodesByString(codePoint, length, to);
}
return EMPTY_FOLD_CODES;
}
protected final CaseFoldCodeItem[] caseFoldCodesByString(int codePoint, int length, int caseFold, int[] to) {
int n = 1;
for (int i = 0; i < to.length; ++i) {
if (to[i] != codePoint) {
n += 1;
}
}
int k = 0;
CaseFoldCodeItem[] items = new CaseFoldCodeItem[n];
items[k++] = new CaseFoldCodeItem(length, 1, new int[] { caseFold });
for (int i = 0; i < to.length; ++i) {
if (to[i] != codePoint) {
items[k++] = new CaseFoldCodeItem(length, 1, new int[] { to[i] });
}
}
return items;
}
protected final CaseFoldCodeItem[] caseFoldCodesByString(int codePoint, int length, int[] to) {
CaseFoldCodeItem[] items = new CaseFoldCodeItem[to.length];
for (int i = 0; i < to.length; ++i) {
items[i] = new CaseFoldCodeItem(length, 1, new int[] { to[i] });
}
return items;
}
@Override
public final boolean isNewLine(byte[] bytes, int p, int end) {
int codePoint = mbcToCode(bytes, p, end);
switch (codePoint) {
case 0x000A:
case 0x000D:
case 0x2028:
case 0x2029:
return true;
default:
return false;
}
}
@Override
public final int propertyNameToCType(byte[] bytes, int p, int end) {
return super.propertyNameToCType(bytes, p, end);
}
@Override
public final boolean isCodeCType(int code, int ctype) {
switch (ctype) {
case CharacterType.DIGIT:
return Characters.isDecimalDigit(code);
case CharacterType.SPACE:
return Characters.isWhitespaceOrLineTerminator(code);
case CharacterType.UPPER:
// needs to be implemented to parse hexadecimal digits
return 'A' <= code && code <= 'Z';
case CharacterType.XDIGIT:
// needs to be implemented to parse hexadecimal digits
return Characters.isHexDigit(code);
case CharacterType.WORD:
return Characters.isASCIIAlphaNumericUnderscore(code);
default:
assert false : "unreachable: " + ctype;
return super.isCodeCType(code, ctype);
}
}
private static final int[] codeRangeDigit, codeRangeWord, codeRangeSpace;
static {
codeRangeDigit = new int[] { 1, '0', '9' };
codeRangeWord = new int[] { 4, '0', '9', 'A', 'Z', '_', '_', 'a', 'z' };
codeRangeSpace = new int[] { 10, 0x0009, 0x000d, 0x0020, 0x0020, 0x00a0, 0x00a0, 0x1680, 0x1680, 0x2000, 0x200a,
0x2028, 0x2029, 0x202f, 0x202f, 0x205f, 0x205f, 0x3000, 0x3000, 0xfeff, 0xfeff };
}
@Override
public final int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
sbOut.value = 0x00; // ?
switch (ctype) {
case CharacterType.DIGIT:
return codeRangeDigit;
case CharacterType.WORD:
return codeRangeWord;
case CharacterType.SPACE:
return codeRangeSpace;
default:
assert false : "unreachable";
return super.ctypeCodeRange(ctype);
}
}
@Override
public final boolean isReverseMatchAllowed(byte[] bytes, int p, int end) {
return false;
}
}