/******************************************************************************* * Copyright 2013 SAP AG * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package com.sap.core.odata.core.uri.expression; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.sap.core.odata.api.edm.EdmLiteral; import com.sap.core.odata.api.edm.EdmLiteralException; import com.sap.core.odata.api.edm.EdmSimpleTypeFacade; import com.sap.core.odata.api.edm.EdmSimpleTypeKind; import com.sap.core.odata.api.uri.expression.ExpressionParserException; import com.sap.core.odata.core.edm.EdmSimpleTypeFacadeImpl; /** * Expression tokenizer * @author SAP AG */ public class Tokenizer { //Pattern OTHER_LIT = Pattern.compile("^([[A-Za-z0-9]._~%!$&*+;:@-]+)"); private static final Pattern OTHER_LIT = Pattern.compile("(?:\\p{L}|\\p{Digit}|[-._~%!$&*+;:@])+"); private static final Pattern FUNK = Pattern.compile("^(startswith|endswith|substring|substring|substringof|indexof|replace|tolower|toupper|trim|concat|length|year|mounth|day|hour|minute|second|round|ceiling|floor)( *)\\("); private static final Pattern AND_SUB1 = Pattern.compile("^(add|sub|mul|div|mod|not) "); private static final Pattern AND_SUB = Pattern.compile("^(and|or|eq|ne|lt|gt|le|ge) "); private static final Pattern prefix = Pattern.compile("^(X|binary|guid|datetime|datetimeoffset|time)'"); private boolean flagIncludeWhitespace = false; private EdmSimpleTypeFacade typeDectector = null; int curPosition; final String expression; final int expressionLength; TokenList tokens; public Tokenizer(final String expression) { typeDectector = new EdmSimpleTypeFacadeImpl(); this.expression = expression; expressionLength = expression.length(); tokens = new TokenList(); } /** * Inform the Tokenizer whether extra tokens for whitespace characters should be added to the token list or not. * @param flagIncludeWhitespace True -> Whitespace token will be added to token list; False otherwise * @return this */ public Tokenizer setFlagWhiteSpace(final Boolean flagIncludeWhitespace) { this.flagIncludeWhitespace = flagIncludeWhitespace; return this; } /** * Tokenizes an expression as defined per OData specification * @return Token list */ public TokenList tokenize() throws TokenizerException, ExpressionParserException { curPosition = 0; int oldPosition; char curCharacter; String token = ""; while (curPosition < expressionLength) { oldPosition = curPosition; curCharacter = expression.charAt(curPosition); switch (curCharacter) { case ' ': //count whitespace and move pointer to next non-whitespace char eatWhiteSpaces(curPosition, curCharacter); break; case '(': tokens.appendToken(curPosition, TokenKind.OPENPAREN, curCharacter); curPosition = curPosition + 1; break; case ')': tokens.appendToken(curPosition, TokenKind.CLOSEPAREN, curCharacter); curPosition = curPosition + 1; break; case '\'': token = ""; readLiteral(curCharacter); break; case ',': tokens.appendToken(oldPosition, TokenKind.COMMA, curCharacter); curPosition = curPosition + 1; break; case '=': case '/': case '?': case '.': case '*': curPosition = curPosition + 1; tokens.appendToken(oldPosition, TokenKind.SYMBOL, curCharacter); break; default: String rem_expr = expression.substring(curPosition); //remaining expression boolean isBinary = checkForBinary(oldPosition, rem_expr); if (isBinary) { break; } //check for prefixes like X, binary, guid, datetime boolean isPrefix = checkForPrefix(rem_expr); if (isPrefix) { break; } //check for math boolean isMath = checkForMath(oldPosition, rem_expr); if (isMath) { break; } //check for function boolean isFunction = checkForMethod(oldPosition, rem_expr); if (isFunction) { break; } boolean isBoolean = checkForBoolean(oldPosition, rem_expr); if (isBoolean) { break; } boolean isLiteral = checkForLiteral(oldPosition, curCharacter, rem_expr); if (isLiteral) { break; } token = new Character(curCharacter).toString(); throw TokenizerException.createUNKNOWN_CHARACTER(oldPosition, token, expression); } } return tokens; } private boolean checkForLiteral(final int oldPosition, final char curCharacter, final String rem_expr) { final Matcher matcher = OTHER_LIT.matcher(rem_expr); boolean isLiteral = false; if (matcher.lookingAt()) { String token = matcher.group(); try { EdmLiteral edmLiteral = typeDectector.parseUriLiteral(token); curPosition = curPosition + token.length(); // It is a simple type. tokens.appendEdmTypedToken(oldPosition, TokenKind.SIMPLE_TYPE, token, edmLiteral); isLiteral = true; } catch (EdmLiteralException e) { // We treat it as normal untyped literal. // The '-' is checked here (and not in the switch statement) because it may be // part of a negative number. if (curCharacter == '-') { curPosition = curPosition + 1; tokens.appendToken(oldPosition, TokenKind.SYMBOL, curCharacter); isLiteral = true; } else { curPosition = curPosition + token.length(); tokens.appendToken(oldPosition, TokenKind.LITERAL, token); isLiteral = true; } } } return isLiteral; } private boolean checkForBoolean(final int oldPosition, final String rem_expr) { boolean isBoolean = false; if (rem_expr.equals("true") || rem_expr.equals("false")) { curPosition = curPosition + rem_expr.length(); tokens.appendEdmTypedToken(oldPosition, TokenKind.SIMPLE_TYPE, rem_expr, new EdmLiteral(EdmSimpleTypeFacadeImpl.getEdmSimpleType(EdmSimpleTypeKind.Boolean), rem_expr)); isBoolean = true; } return isBoolean; } private void eatWhiteSpaces(final int oldPosition, char curCharacter) { int lv_token_len; String expression_sub; while ((curCharacter == ' ') && (curPosition < expressionLength)) { curPosition = curPosition + 1; if (curPosition < expressionLength) { curCharacter = expression.charAt(curPosition); } } lv_token_len = curPosition - oldPosition; if (flagIncludeWhitespace == true) { expression_sub = expression.substring(oldPosition, oldPosition + lv_token_len); tokens.appendEdmTypedToken(oldPosition, TokenKind.WHITESPACE, expression_sub, null); } } private boolean checkForMethod(final int oldPosition, final String rem_expr) { boolean isMethod = false; Matcher matcher = FUNK.matcher(rem_expr); if (matcher.find()) { String token = matcher.group(1); curPosition = curPosition + token.length(); tokens.appendToken(oldPosition, TokenKind.LITERAL, token); isMethod = true; } return isMethod; } private boolean checkForMath(final int oldPosition, final String rem_expr) { boolean isMath = false; Matcher matcher1 = AND_SUB1.matcher(rem_expr); if (matcher1.find()) { String token = matcher1.group(1); curPosition = curPosition + token.length(); tokens.appendToken(oldPosition, TokenKind.LITERAL, token); isMath = true; } return isMath; } private boolean checkForBinary(final int oldPosition, final String rem_expr) { boolean isBinary = false; Matcher matcher1 = AND_SUB.matcher(rem_expr); if (matcher1.find()) { String token = matcher1.group(1); curPosition = curPosition + token.length(); tokens.appendToken(oldPosition, TokenKind.LITERAL, token); isBinary = true; } return isBinary; } private boolean checkForPrefix(final String rem_expr) throws ExpressionParserException, TokenizerException { boolean isPrefix = false; Matcher matcher = prefix.matcher(rem_expr); String token = ""; char curCharacter; if (matcher.find()) { token = matcher.group(1); curPosition = curPosition + token.length(); curCharacter = expression.charAt(curPosition); //"should be ' readLiteral(curCharacter, token); isPrefix = true; } return isPrefix; } private void readLiteral(final char curCharacter) throws ExpressionParserException, TokenizerException { readLiteral(curCharacter, ""); } /** * Read up to single ' and move pointer to the following char and tries a type detection * @param curCharacter * @param token * @throws ExpressionParserException * @throws TokenizerException */ private void readLiteral(char curCharacter, String token) throws ExpressionParserException, TokenizerException { int offsetPos = -token.length(); int oldPosition = curPosition; token = token + Character.toString(curCharacter); curPosition = curPosition + 1; boolean wasApostroph = false; //leading ' does not count while (curPosition < expressionLength) { curCharacter = expression.charAt(curPosition); if (curCharacter != '\'') { if (wasApostroph == true) { break; } token = token + curCharacter; wasApostroph = false; } else { if (wasApostroph) { wasApostroph = false; //a double ' is a normal character ' } else { wasApostroph = true; token = token + curCharacter; } } curPosition = curPosition + 1; } if (!wasApostroph) { //Exception tested within TestPMparseFilterString throw FilterParserExceptionImpl.createTOKEN_UNDETERMINATED_STRING(oldPosition, expression); } try { EdmLiteral edmLiteral = typeDectector.parseUriLiteral(token); tokens.appendEdmTypedToken(oldPosition + offsetPos, TokenKind.SIMPLE_TYPE, token, edmLiteral); } catch (EdmLiteralException ex) { throw TokenizerException.createTYPEDECTECTION_FAILED_ON_STRING(ex, oldPosition, token); } } }