/*
* Copyright (c) 2013, the Dart project authors.
*
* Licensed under the Eclipse Public License v1.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.eclipse.org/legal/epl-v10.html
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.dart.engine.html.scanner;
import static com.google.dart.engine.html.scanner.TokenType.COMMENT;
import static com.google.dart.engine.html.scanner.TokenType.DECLARATION;
import static com.google.dart.engine.html.scanner.TokenType.DIRECTIVE;
import static com.google.dart.engine.html.scanner.TokenType.EOF;
import static com.google.dart.engine.html.scanner.TokenType.EQ;
import static com.google.dart.engine.html.scanner.TokenType.GT;
import static com.google.dart.engine.html.scanner.TokenType.LT;
import static com.google.dart.engine.html.scanner.TokenType.LT_SLASH;
import static com.google.dart.engine.html.scanner.TokenType.SLASH_GT;
import static com.google.dart.engine.html.scanner.TokenType.STRING;
import static com.google.dart.engine.html.scanner.TokenType.TAG;
import static com.google.dart.engine.html.scanner.TokenType.TEXT;
import junit.framework.TestCase;
public abstract class AbstractScannerTest extends TestCase {
public void test_tokenize_attribute() {
tokenize("<html bob=\"one two\">", new Object[] {LT, "html", "bob", EQ, "\"one two\"", GT});
}
public void test_tokenize_comment() {
tokenize("<!-- foo -->", new Object[] {"<!-- foo -->"});
}
public void test_tokenize_comment_incomplete() {
tokenize("<!-- foo", new Object[] {"<!-- foo"});
}
public void test_tokenize_comment_with_gt() {
tokenize("<!-- foo > -> -->", new Object[] {"<!-- foo > -> -->"});
}
public void test_tokenize_declaration() {
tokenize("<! foo ><html>", new Object[] {"<! foo >", LT, "html", GT});
}
public void test_tokenize_declaration_malformed() {
tokenize("<! foo /><html>", new Object[] {"<! foo />", LT, "html", GT});
}
public void test_tokenize_directive_incomplete() {
tokenize("<? \nfoo", new Object[] {"<? \nfoo"}, new int[] {0, 4});
}
public void test_tokenize_directive_xml() {
tokenize(
"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",
new Object[] {"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"});
}
public void test_tokenize_directives_incomplete_with_newline() {
tokenize("<! \nfoo", new Object[] {"<! \nfoo"}, new int[] {0, 4});
}
public void test_tokenize_empty() {
tokenize("", new Object[] {});
}
public void test_tokenize_lt() {
tokenize("<", new Object[] {LT});
}
public void test_tokenize_script_embedded_tags() throws Exception {
tokenize("<script> <p></p></script>", new Object[] {
LT, "script", GT, " <p></p>", LT_SLASH, "script", GT});
}
public void test_tokenize_script_embedded_tags2() throws Exception {
tokenize("<script> <p></p><</script>", new Object[] {
LT, "script", GT, " <p></p><", LT_SLASH, "script", GT});
}
public void test_tokenize_script_embedded_tags3() throws Exception {
tokenize("<script> <p></p></</script>", new Object[] {
LT, "script", GT, " <p></p></", LT_SLASH, "script", GT});
}
public void test_tokenize_script_partial() throws Exception {
tokenize("<script> <p> ", new Object[] {LT, "script", GT, " <p> "});
}
public void test_tokenize_script_partial2() throws Exception {
tokenize("<script> <p> <", new Object[] {LT, "script", GT, " <p> <"});
}
public void test_tokenize_script_partial3() throws Exception {
tokenize("<script> <p> </", new Object[] {LT, "script", GT, " <p> </"});
}
public void test_tokenize_script_ref() throws Exception {
tokenize("<script source='some.dart'/> <p>", new Object[] {
LT, "script", "source", EQ, "'some.dart'", SLASH_GT, " ", LT, "p", GT});
}
public void test_tokenize_script_with_newline() throws Exception {
tokenize("<script> <p>\n </script>", new Object[] {
LT, "script", GT, " <p>\n ", LT_SLASH, "script", GT}, new int[] {0, 13});
}
public void test_tokenize_spaces_and_newlines() {
Token token = tokenize(
" < html \n bob = 'joe\n' >\n <\np > one \r\n two <!-- \rfoo --> </ p > </ html > ",
new Object[] {
" ", LT, "html", "bob", EQ, "'joe\n'", GT, "\n ", LT, "p", GT, " one \r\n two ",
"<!-- \rfoo -->", " ", LT_SLASH, "p", GT, " ", LT_SLASH, "html", GT, " "},
new int[] {0, 9, 21, 25, 28, 38, 49});
token = token.getNext();
assertEquals(1, token.getOffset());
token = token.getNext();
assertEquals(3, token.getOffset());
token = token.getNext();
assertEquals(10, token.getOffset());
}
public void test_tokenize_string() {
tokenize("<p bob=\"foo\">", new Object[] {LT, "p", "bob", EQ, "\"foo\"", GT});
}
public void test_tokenize_string_partial() {
tokenize("<p bob=\"foo", new Object[] {LT, "p", "bob", EQ, "\"foo"});
}
public void test_tokenize_string_single_quote() {
tokenize("<p bob='foo'>", new Object[] {LT, "p", "bob", EQ, "'foo'", GT});
}
public void test_tokenize_string_single_quote_partial() {
tokenize("<p bob='foo", new Object[] {LT, "p", "bob", EQ, "'foo"});
}
public void test_tokenize_tag_begin_end() {
tokenize("<html></html>", new Object[] {LT, "html", GT, LT_SLASH, "html", GT});
}
public void test_tokenize_tag_begin_only() {
Token token = tokenize("<html>", new Object[] {LT, "html", GT});
token = token.getNext();
assertEquals(1, token.getOffset());
}
public void test_tokenize_tag_incomplete_with_special_characters() {
tokenize("<br-a_b", new Object[] {LT, "br-a_b"});
}
public void test_tokenize_tag_self_contained() {
tokenize("<br/>", new Object[] {LT, "br", SLASH_GT});
}
public void test_tokenize_tags_wellformed() {
tokenize("<html><p>one two</p></html>", new Object[] {
LT, "html", GT, LT, "p", GT, "one two", LT_SLASH, "p", GT, LT_SLASH, "html", GT});
}
protected abstract AbstractScanner newScanner(String input);
/**
* Given an object representing an expected token, answer the expected token type.
*
* @param count the token count for error reporting
* @param expected the object representing an expected token
* @return the expected token type
*/
private TokenType getExpectedTokenType(int count, Object expected) {
if (expected instanceof TokenType) {
return (TokenType) expected;
}
if (expected instanceof String) {
String lexeme = (String) expected;
if (lexeme.startsWith("\"") || lexeme.startsWith("'")) {
return STRING;
}
if (lexeme.startsWith("<!--")) {
return COMMENT;
}
if (lexeme.startsWith("<!")) {
return DECLARATION;
}
if (lexeme.startsWith("<?")) {
return DIRECTIVE;
}
if (isTag(lexeme)) {
return TAG;
}
return TEXT;
}
fail("Unknown expected token " + count + ": "
+ (expected != null ? expected.getClass() : "null"));
return null;
}
private boolean isTag(String lexeme) {
if (lexeme.length() == 0 || !Character.isLetter(lexeme.charAt(0))) {
return false;
}
for (int index = 1; index < lexeme.length(); index++) {
char ch = lexeme.charAt(index);
if (!Character.isLetterOrDigit(ch) && ch != '-' && ch != '_') {
return false;
}
}
return true;
}
private Token tokenize(String input, Object[] expectedTokens) {
return tokenize(input, expectedTokens, new int[] {0});
}
private Token tokenize(String input, Object[] expectedTokens, int[] expectedLineStarts) {
AbstractScanner scanner = newScanner(input);
scanner.setPassThroughElements(new String[] {"script"});
int count = 0;
Token firstToken = scanner.tokenize();
Token token = firstToken;
Token previousToken = token.getPrevious();
assertTrue(previousToken.getType() == EOF);
assertSame(previousToken, previousToken.getPrevious());
assertEquals(-1, previousToken.getOffset());
assertSame(token, previousToken.getNext());
assertEquals(0, token.getOffset());
while (token.getType() != EOF) {
if (count == expectedTokens.length) {
fail("too many parsed tokens");
}
Object expected = expectedTokens[count];
TokenType expectedTokenType = getExpectedTokenType(count, expected);
assertSame("token " + count, expectedTokenType, token.getType());
if (expectedTokenType.getLexeme() != null) {
assertEquals("token " + count, expectedTokenType.getLexeme(), token.getLexeme());
} else {
assertEquals("token " + count, expected, token.getLexeme());
}
count++;
previousToken = token;
token = token.getNext();
assertSame(previousToken, token.getPrevious());
}
assertSame(token, token.getNext());
assertEquals(input.length(), token.getOffset());
if (count != expectedTokens.length) {
assertTrue("not enough parsed tokens", false);
}
int[] lineStarts = scanner.getLineStarts();
boolean success = expectedLineStarts.length == lineStarts.length;
if (success) {
for (int i = 0; i < lineStarts.length; i++) {
if (expectedLineStarts[i] != lineStarts[i]) {
success = false;
break;
}
}
}
if (!success) {
StringBuilder msg = new StringBuilder();
msg.append("Expected line starts ");
for (int start : expectedLineStarts) {
msg.append(start);
msg.append(", ");
}
msg.append(" but found ");
for (int start : lineStarts) {
msg.append(start);
msg.append(", ");
}
fail(msg.toString());
}
return firstToken;
}
}