/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.jcr.query.parse;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;
import java.util.LinkedList;
import org.junit.Before;
import org.junit.Test;
import org.modeshape.common.text.ParsingException;
import org.modeshape.common.text.Position;
import org.modeshape.common.text.TokenStream.CharacterArrayStream;
import org.modeshape.common.text.TokenStream.Tokens;
import org.modeshape.jcr.query.parse.BasicSqlQueryParser.SqlTokenizer;
public class SqlTokenizerTest {
private SqlTokenizer tokenizer;
private Tokens tokenFactory;
private LinkedList<int[]> tokenValues;
@Before
public void beforeEach() {
tokenizer = new SqlTokenizer(true);
final LinkedList<int[]> tokenValues = new LinkedList<int[]>();
tokenFactory = new Tokens() {
@Override
public void addToken( Position position,
int index ) {
int[] token = new int[] {index, index + 1, 0};
tokenValues.add(token);
}
@Override
public void addToken( Position position,
int startIndex,
int endIndex ) {
int[] token = new int[] {startIndex, endIndex, 0};
tokenValues.add(token);
}
@Override
public void addToken( Position position,
int startIndex,
int endIndex,
int type ) {
int[] token = new int[] {startIndex, endIndex, type};
tokenValues.add(token);
}
};
this.tokenValues = tokenValues;
}
protected void tokenize( String input ) {
tokenizer.tokenize(new CharacterArrayStream(input.toCharArray()), tokenFactory);
}
protected void assertNextTokenIs( int startIndex,
int endIndex,
int type ) {
int[] token = tokenValues.removeFirst();
assertThat(token[0], is(startIndex));
assertThat(token[1], is(endIndex));
assertThat(token[2], is(type));
}
protected void assertNoMoreTokens() {
assertThat(tokenValues.isEmpty(), is(true));
}
@Test
public void shouldCreateNoTokensForEmptyContent() {
tokenize("");
assertNoMoreTokens();
}
@Test
public void shouldCreateNoTokensForContentWithOnlyWhitespace() {
tokenize(" \t \n \r\n \r ");
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForEachSymbolCharacter() {
String content = "(){}*,:+%?$]!<>|=:";
int numSymbols = content.length();
tokenize(content);
for (int i = 0; i != numSymbols; ++i) {
assertNextTokenIs(i, i + 1, SqlTokenizer.SYMBOL);
}
assertNoMoreTokens();
}
@Test
public void shouldNotConsiderDoubleSlashAsComment() {
String content = "++//this\n";
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, 3, SqlTokenizer.SYMBOL);
assertNextTokenIs(3, 4, SqlTokenizer.SYMBOL);
assertNextTokenIs(4, content.length() - 1, SqlTokenizer.WORD); // -1 because '\n' is not included
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForEndOfLineCommentUsingDoubleDash() {
String content = "++--this is a comment\n";
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, content.length() - 1, SqlTokenizer.COMMENT); // -1 because '\n' is not included
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForMultiLineComment() {
String content = "==/*this is a comment*/-";
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, content.length() - 1, SqlTokenizer.COMMENT);
assertNextTokenIs(content.length() - 1, content.length(), SqlTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForMultiLineCommentAtEndOfContent() {
String content = "==/*this is a comment*/";
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, content.length(), SqlTokenizer.COMMENT);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForMultiLineCommentWithoutTerminatingCharacters() {
String content = "==/*this is a comment";
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, content.length(), SqlTokenizer.COMMENT);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForMultiLineCommentWithoutAllTerminatingCharacters() {
String content = "==/*this is a comment*";
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, content.length(), SqlTokenizer.COMMENT);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForSingleQuotedString() {
String content = "=='this is a single-quoted \n string'-";
assertThat(content.charAt(2), is('\''));
assertThat(content.charAt(35), is('\''));
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, 36, SqlTokenizer.QUOTED_STRING);
assertNextTokenIs(36, 37, SqlTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForSingleQuotedStringWithEscapedSingleQuoteCharacters() {
String content = "=='this \"is\" a \\'single-quoted\\' \n string'-";
assertThat(content.charAt(2), is('\''));
assertThat(content.charAt(41), is('\''));
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, 42, SqlTokenizer.QUOTED_STRING);
assertNextTokenIs(42, 43, SqlTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForSingleQuotedStringAtEndOfContent() {
String content = "=='this is a single-quoted \n string'";
assertThat(content.charAt(2), is('\''));
assertThat(content.charAt(35), is('\''));
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, 36, SqlTokenizer.QUOTED_STRING);
assertNoMoreTokens();
}
@Test( expected = ParsingException.class )
public void shouldCreateTokenForSingleQuotedStringWithoutClosingQuote() {
String content = "=='this is a single-quoted \n string";
tokenize(content);
}
@Test
public void shouldCreateTokenForDoubleQuotedString() {
String content = "==\"this is a double-quoted \n string\"-";
assertThat(content.charAt(2), is('"'));
assertThat(content.charAt(35), is('"'));
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, 36, SqlTokenizer.QUOTED_STRING);
assertNextTokenIs(36, 37, SqlTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForDoubleQuotedStringWithEscapedDoubleQuoteCharacters() {
String content = "==\"this 'is' a \\\"double-quoted\\\" \n string\"-";
assertThat(content.charAt(2), is('"'));
assertThat(content.charAt(41), is('"'));
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, 42, SqlTokenizer.QUOTED_STRING);
assertNextTokenIs(42, 43, SqlTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForDoubleQuotedStringAtEndOfContent() {
String content = "==\"this is a double-quoted \n string\"";
assertThat(content.charAt(2), is('"'));
assertThat(content.charAt(35), is('"'));
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.SYMBOL);
assertNextTokenIs(2, 36, SqlTokenizer.QUOTED_STRING);
assertNoMoreTokens();
}
@Test( expected = ParsingException.class )
public void shouldCreateTokenForDoubleQuotedStringWithoutClosingQuote() {
String content = "==\"this is a double-quoted \n string";
tokenize(content);
}
@Test
public void shouldCreateTokenForSquareBracketQuotedString() {
String content = "[/foo/bar/baz]";
tokenize(content);
assertNextTokenIs(0, content.length(), SqlTokenizer.QUOTED_STRING);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForSquareBracketQuotedStringWithEmbeddedUnquotedSquareBrackets() {
String content = "[/foo/bar[12]/baz[3]]";
tokenize(content);
assertNextTokenIs(0, content.length(), SqlTokenizer.QUOTED_STRING);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForSquareBracketQuotedStringWithUnrealisticEmbeddedUnquotedSquareBrackets() {
String content = "[/foo/bar[12]/baz[[[3]]]]";
tokenize(content);
assertNextTokenIs(0, content.length(), SqlTokenizer.QUOTED_STRING);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokensForWordsWithAlphabeticCharacters() {
String content = "This is a series of words.";
tokenize(content);
assertNextTokenIs(0, 4, SqlTokenizer.WORD);
assertNextTokenIs(5, 7, SqlTokenizer.WORD);
assertNextTokenIs(8, 9, SqlTokenizer.WORD);
assertNextTokenIs(10, 16, SqlTokenizer.WORD);
assertNextTokenIs(17, 19, SqlTokenizer.WORD);
assertNextTokenIs(20, 25, SqlTokenizer.WORD);
assertNextTokenIs(25, 26, SqlTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokensForWordsWithNumericCharacters() {
String content = "1234 4 5353.324";
tokenize(content);
assertNextTokenIs(0, 4, SqlTokenizer.WORD);
assertNextTokenIs(5, 6, SqlTokenizer.WORD);
assertNextTokenIs(7, 11, SqlTokenizer.WORD);
assertNextTokenIs(11, 12, SqlTokenizer.SYMBOL);
assertNextTokenIs(12, 15, SqlTokenizer.WORD);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokensForWordsWithAlphaNumericCharacters() {
String content = "123a 5353.324e100";
tokenize(content);
assertNextTokenIs(0, 4, SqlTokenizer.WORD);
assertNextTokenIs(5, 9, SqlTokenizer.WORD);
assertNextTokenIs(9, 10, SqlTokenizer.SYMBOL);
assertNextTokenIs(10, 17, SqlTokenizer.WORD);
assertNoMoreTokens();
}
@Test
public void shouldParseAlphaNumericAndUnderscoreCharactersAsOneWordToken() {
String content = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_";
tokenize(content);
assertNextTokenIs(0, 63, SqlTokenizer.WORD);
assertNoMoreTokens();
}
@Test
public void shouldParseUnquotedNameWithPrefixAsSeparateTokens() {
String content = "dna:name";
tokenize(content);
assertNextTokenIs(0, 3, SqlTokenizer.WORD);
assertNextTokenIs(3, 4, SqlTokenizer.SYMBOL);
assertNextTokenIs(4, 8, SqlTokenizer.WORD);
assertNoMoreTokens();
}
@Test
public void shouldParseQuotedNameWithPrefixAsOneToken() {
String content = "'dna:name'";
tokenize(content);
assertNextTokenIs(0, 10, SqlTokenizer.QUOTED_STRING);
assertNoMoreTokens();
}
@Test
public void shouldParseUnquotedPathAsSeparateTokens() {
String content = "/a/b/c/dna:name[1]/e/f";
tokenize(content);
assertNextTokenIs(0, 1, SqlTokenizer.SYMBOL);
assertNextTokenIs(1, 2, SqlTokenizer.WORD); // a
assertNextTokenIs(2, 3, SqlTokenizer.SYMBOL);
assertNextTokenIs(3, 4, SqlTokenizer.WORD); // b
assertNextTokenIs(4, 5, SqlTokenizer.SYMBOL);
assertNextTokenIs(5, 6, SqlTokenizer.WORD); // c
assertNextTokenIs(6, 7, SqlTokenizer.SYMBOL);
assertNextTokenIs(7, 10, SqlTokenizer.WORD); // dna
assertNextTokenIs(10, 11, SqlTokenizer.SYMBOL);
assertNextTokenIs(11, 15, SqlTokenizer.WORD); // name
assertNextTokenIs(15, 18, SqlTokenizer.QUOTED_STRING); // [1]
assertNextTokenIs(18, 19, SqlTokenizer.SYMBOL);
assertNextTokenIs(19, 20, SqlTokenizer.WORD); // e
assertNextTokenIs(20, 21, SqlTokenizer.SYMBOL);
assertNextTokenIs(21, 22, SqlTokenizer.WORD); // f
assertNoMoreTokens();
}
@Test
public void shouldParseQuotedPathAsOneToken() {
String content = "'/a/b/c/dna:name[1]/e/f'";
tokenize(content);
assertNextTokenIs(0, 24, SqlTokenizer.QUOTED_STRING);
assertNoMoreTokens();
}
}