package folioxml.lucene.analysis.folio; public class FolioEnuTokenizer extends LookAroundCharTokenizer { /* alphanum = 0-9A-Za-z + 138, 140, 154, 156, 159, 192�214, 216�246, 248�255 single quotes, commas, minus sign, periods, and forward slashes are permitted in special contexts ' Must come between two alpha characters. , Must come between two numeric characters, or immediately precede a numeric character - Must come between two numeric characters, or precede or follow a numeric character. - Must come between two between alpha characters or precede by alpha characters with a numeric . Must come between two alpha-numeric characters or immediately precede a numeric character. / Must come between two numeric characters. */ public FolioEnuTokenizer() { super(); } @Override protected boolean isTokenChar(int p, int c, int n) { if (isAlphaNumeric(c)) return true; if (c == '\'' && isAlpha(p) && isAlpha(n)) return true; if (c == ',' && isNumeric(n)) return true; if (c == '-' && (isNumeric(n) || isNumeric(p))) return true; if (c == '-' && (isAlpha(n) && isNumeric(p))) return true; if (c == '/' && isNumeric(n) && isNumeric(p)) return true; if (c == '.' && (isNumeric(n) || (isAlphaNumeric(p) && isAlphaNumeric(n)))) return true; return false; } protected boolean isAlphaNumeric(int c) { return isAlpha(c) || isNumeric(c); } protected boolean isNumeric(int c) { return (c >= '0' && c <= '9'); } protected boolean isAlpha(int c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == 138 || c == 140 || c == 154 || c == 156 || c == 159 || (c >= 192 && c <= 214) || (c >= 216 && c <= 246) || (c >= 248 && c <= 255); } @Override protected int normalize(int c) { return Character.toLowerCase(c); } }