/*
* DBeaver - Universal Database Manager
* Copyright (C) 2010-2017 Serge Rider (serge@jkiss.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.jkiss.dbeaver.model.sql.format.tokenized;
import org.jkiss.dbeaver.model.sql.format.SQLFormatterConfiguration;
import org.jkiss.utils.ArrayUtils;
import org.jkiss.utils.CommonUtils;
import java.util.*;
/**
* SQLTokensParser
* TODO: check comment characters from syntax manager, not constants
*/
class SQLTokensParser {
private static final String[] twoCharacterSymbol = { "<>", "<=", ">=", "||", "()", "!=", ":=", ".*" };
private final SQLFormatterConfiguration configuration;
private final String quoteSymbol;
private String fBefore;
private int fPos;
private char structSeparator;
private String catalogSeparator;
private Set<String> commands = new HashSet<>();
private String[] singleLineComments;
private char[] singleLineCommentStart;
public SQLTokensParser(SQLFormatterConfiguration configuration) {
this.configuration = configuration;
this.structSeparator = configuration.getSyntaxManager().getStructSeparator();
this.catalogSeparator = configuration.getSyntaxManager().getCatalogSeparator();
this.quoteSymbol = configuration.getSyntaxManager().getQuoteSymbol();
this.singleLineComments = configuration.getSyntaxManager().getDialect().getSingleLineComments();
this.singleLineCommentStart = new char[this.singleLineComments.length];
for (int i = 0; i < singleLineComments.length; i++) {
if (singleLineComments[i].isEmpty()) singleLineCommentStart[i] = 0;
else singleLineCommentStart[i] = singleLineComments[i].charAt(0);
}
String delimiterRedefiner = configuration.getSyntaxManager().getDialect().getScriptDelimiterRedefiner();
if (!CommonUtils.isEmpty(delimiterRedefiner)) {
commands.add(delimiterRedefiner.toUpperCase(Locale.ENGLISH));
}
}
public static boolean isSpace(final char argChar) {
return Character.isWhitespace(argChar);
}
public static boolean isLetter(final char argChar) {
return !isSpace(argChar) && !isDigit(argChar) && !isSymbol(argChar);
}
public static boolean isDigit(final char argChar) {
return Character.isDigit(argChar);
}
public static boolean isSymbol(final char argChar) {
switch (argChar) {
case '"': // double quote
case '?': // question mark
case '%': // percent
case '&': // ampersand
case '\'': // quote
case '(': // left paren
case ')': // right paren
case '|': // vertical bar
case '*': // asterisk
case '+': // plus sign
case ',': // comma
case '-': // minus sign
case '.': // period
case '/': // solidus
case ':': // colon
case ';': // semicolon
case '<': // less than operator
case '=': // equals operator
case '>': // greater than operator
case '!': // greater than operator
case '~': // greater than operator
case '`': // apos
return true;
default:
return false;
}
}
FormatterToken nextToken() {
int start_pos = fPos;
if (fPos >= fBefore.length()) {
fPos++;
return new FormatterToken(TokenType.END, "", start_pos);
}
char fChar = fBefore.charAt(fPos);
if (isSpace(fChar)) {
StringBuilder workString = new StringBuilder();
for (;;) {
workString.append(fChar);
fChar = fBefore.charAt(fPos);
if (!isSpace(fChar)) {
return new FormatterToken(TokenType.SPACE, workString.toString(), start_pos);
}
fPos++;
if (fPos >= fBefore.length()) {
return new FormatterToken(TokenType.SPACE, workString.toString(), start_pos);
}
}
} else if (fChar == ';') {
fPos++;
return new FormatterToken(TokenType.SYMBOL, ";", start_pos);
} else if (isDigit(fChar)) {
StringBuilder s = new StringBuilder();
while (isDigit(fChar) || fChar == '.') {
// if (ch == '.') type = Token.REAL;
s.append(fChar);
fPos++;
if (fPos >= fBefore.length()) {
break;
}
fChar = fBefore.charAt(fPos);
}
return new FormatterToken(TokenType.VALUE, s.toString(), start_pos);
}
// single line comment
else if (ArrayUtils.contains(singleLineCommentStart, fChar)) {
fPos++;
String commentString = null;
for (String slc : singleLineComments) {
if (fBefore.length() >= start_pos + slc.length() && slc.equals(fBefore.substring(start_pos, start_pos + slc.length()))) {
commentString = slc;
break;
}
}
if (commentString == null) {
return new FormatterToken(TokenType.SYMBOL, String.valueOf(fChar), start_pos);
}
fPos += commentString.length() - 1;
while (fPos < fBefore.length()) {
fPos++;
if (fBefore.charAt(fPos - 1) == '\n') {
break;
}
}
commentString = fBefore.substring(start_pos, fPos);
return new FormatterToken(TokenType.COMMENT, commentString, start_pos);
}
else if (isLetter(fChar)) {
StringBuilder s = new StringBuilder();
while (isLetter(fChar) || isDigit(fChar) || fChar == '*' || structSeparator == fChar || catalogSeparator.indexOf(fChar) != -1) {
s.append(fChar);
fPos++;
if (fPos >= fBefore.length()) {
break;
}
fChar = fBefore.charAt(fPos);
}
String word = s.toString();
if (commands.contains(word.toUpperCase(Locale.ENGLISH))) {
s.setLength(0);
for (; fPos < fBefore.length(); fPos++) {
fChar = fBefore.charAt(fPos);
if (fChar == '\n' || fChar == '\r') {
break;
} else {
s.append(fChar);
}
}
return new FormatterToken(TokenType.COMMAND, word + s.toString(), start_pos);
}
if (configuration.getSyntaxManager().getDialect().getKeywordType(word) != null) {
return new FormatterToken(TokenType.KEYWORD, word, start_pos);
}
return new FormatterToken(TokenType.NAME, word, start_pos);
}
else if (fChar == '/') {
fPos++;
char ch2 = fBefore.charAt(fPos);
if (ch2 != '*') {
return new FormatterToken(TokenType.SYMBOL, "/", start_pos);
}
StringBuilder s = new StringBuilder("/*");
fPos++;
for (;;) {
int ch0 = fChar;
fChar = fBefore.charAt(fPos);
s.append(fChar);
fPos++;
if (ch0 == '*' && fChar == '/') {
return new FormatterToken(TokenType.COMMENT, s.toString(), start_pos);
}
}
} else {
if (fChar == '\'' || fChar == '\"' || (quoteSymbol != null && !quoteSymbol.isEmpty() && fChar == quoteSymbol.charAt(0))) {
fPos++;
char quoteChar = fChar;
StringBuilder s = new StringBuilder();
s.append(quoteChar);
for (;;) {
fChar = fBefore.charAt(fPos);
s.append(fChar);
fPos++;
char fNextChar = fPos >= fBefore.length() - 1 ? 0 : fBefore.charAt(fPos);
if (fChar == quoteChar && fNextChar == quoteChar) {
// Escaped quote
s.append(fChar);
fPos++;
continue;
}
if (fChar == quoteChar) {
return new FormatterToken(TokenType.VALUE, s.toString(), start_pos);
}
}
}
else if (isSymbol(fChar)) {
String s = String.valueOf(fChar);
fPos++;
if (fPos >= fBefore.length()) {
return new FormatterToken(TokenType.SYMBOL, s, start_pos);
}
char ch2 = fBefore.charAt(fPos);
for (int i = 0; i < twoCharacterSymbol.length; i++) {
if (twoCharacterSymbol[i].charAt(0) == fChar && twoCharacterSymbol[i].charAt(1) == ch2) {
fPos++;
s += ch2;
break;
}
}
return new FormatterToken(TokenType.SYMBOL, s, start_pos);
} else {
fPos++;
return new FormatterToken(TokenType.UNKNOWN, String.valueOf(fChar), start_pos);
}
}
}
public List<FormatterToken> parse(final String argSql) {
fPos = 0;
fBefore = argSql;
final List<FormatterToken> list = new ArrayList<>();
for (;;) {
final FormatterToken token = nextToken();
if (token.getType() == TokenType.END) {
break;
}
list.add(token);
}
return list;
}
}