/******************************************************************************* * Copyright (c) 2008, 2015 Nokia Corporation. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Ed Swartz (Nokia) - initial API and implementation *******************************************************************************/ package org.eclipse.cdt.autotools.ui.tests.autoconf; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.ArrayList; import java.util.List; import org.eclipse.cdt.autotools.ui.editors.parser.AutoconfTokenizer; import org.eclipse.cdt.autotools.ui.editors.parser.ITokenConstants; import org.eclipse.cdt.autotools.ui.editors.parser.ParseException; import org.eclipse.cdt.autotools.ui.editors.parser.Token; import org.eclipse.jface.text.Document; import org.eclipse.jface.text.IDocument; import org.junit.Test; /** * Test autoconf tokenizer. The tokenizer mainly is used to detect boundaries and keywords * and is not a full shell tokenizer. * @author eswartz * */ public class TestTokenizer { private ArrayList<ParseException> tokenizerErrors; protected IDocument createDocument(String text) { return new Document(text); } protected List<Token> tokenize(IDocument document, boolean isM4Mode) { tokenizerErrors = new ArrayList<>(); AutoconfTokenizer tokenizer = new AutoconfTokenizer(document, (ParseException exception) -> { tokenizerErrors.add(exception); }); tokenizer.setM4Context(isM4Mode); return tokenize(tokenizer); } protected List<Token> tokenize(AutoconfTokenizer tokenizer) { List<Token> tokens = new ArrayList<>(); while (true) { Token token = tokenizer.readToken(); if (token.getType() == ITokenConstants.EOF) break; tokens.add(token); } return tokens; } protected void checkNoErrors() { assertEquals(0, tokenizerErrors.size()); } @Test public void testEmpty() { IDocument document = createDocument(""); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(0, tokens.size()); } @Test public void testEOL1() { IDocument document = createDocument("\n"); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(1, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.EOL, "\n"); } @Test public void testEOL2() { IDocument document = createDocument("\r\n"); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(1, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.EOL, "\r\n"); } @Test public void testEOL3() { IDocument document = createDocument("\n\r\n\n"); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(3, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.EOL, "\n"); checkToken(tokens.get(1), document, ITokenConstants.EOL, "\r\n"); checkToken(tokens.get(2), document, ITokenConstants.EOL, "\n"); } @Test public void testShellText() { // default mode is shell String text = "random\nstuff\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(4, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "random"); checkToken(tokens.get(1), document, ITokenConstants.EOL, "\n"); checkToken(tokens.get(2), document, ITokenConstants.WORD, "stuff"); checkToken(tokens.get(3), document, ITokenConstants.EOL, "\n"); } @Test public void testShellTokens() { // default mode is shell String text = "while true; do ls; done\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(8, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.SH_WHILE, "while"); checkToken(tokens.get(1), document, ITokenConstants.WORD, "true"); checkToken(tokens.get(2), document, ITokenConstants.SEMI, ";"); checkToken(tokens.get(3), document, ITokenConstants.SH_DO, "do"); checkToken(tokens.get(4), document, ITokenConstants.WORD, "ls"); checkToken(tokens.get(5), document, ITokenConstants.SEMI, ";"); checkToken(tokens.get(6), document, ITokenConstants.SH_DONE, "done"); checkToken(tokens.get(7), document, ITokenConstants.EOL, "\n"); } @Test public void testShellTokens2() { // don't misread partial tokens String text = "while_stuff incase"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(2, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "while_stuff"); checkToken(tokens.get(1), document, ITokenConstants.WORD, "incase"); } @Test public void testShellTokens3() { // don't interpret m4 strings in shell mode String text = "`foo'"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); assertEquals(1, tokenizerErrors.size()); assertEquals(1, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.SH_STRING_BACKTICK, "foo'", 5); } @Test public void testShellTokens4() { String text = "echo $if $((foo)) $\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(11, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "echo"); checkToken(tokens.get(1), document, ITokenConstants.SH_DOLLAR, "$"); // dollar guards keywords, but the tokenizer doesn't know this checkToken(tokens.get(2), document, ITokenConstants.SH_IF, "if"); checkToken(tokens.get(3), document, ITokenConstants.SH_DOLLAR, "$"); checkToken(tokens.get(4), document, ITokenConstants.LPAREN, "("); checkToken(tokens.get(5), document, ITokenConstants.LPAREN, "("); checkToken(tokens.get(6), document, ITokenConstants.WORD, "foo"); checkToken(tokens.get(7), document, ITokenConstants.RPAREN, ")"); checkToken(tokens.get(8), document, ITokenConstants.RPAREN, ")"); checkToken(tokens.get(9), document, ITokenConstants.SH_DOLLAR, "$"); checkToken(tokens.get(10), document, ITokenConstants.EOL, "\n"); } @Test public void testShellTokens5() { String text = "while do select for until done\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(7, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.SH_WHILE, "while"); checkToken(tokens.get(1), document, ITokenConstants.SH_DO, "do"); checkToken(tokens.get(2), document, ITokenConstants.SH_SELECT, "select"); checkToken(tokens.get(3), document, ITokenConstants.SH_FOR, "for"); checkToken(tokens.get(4), document, ITokenConstants.SH_UNTIL, "until"); checkToken(tokens.get(5), document, ITokenConstants.SH_DONE, "done"); checkToken(tokens.get(6), document, ITokenConstants.EOL, "\n"); } @Test public void testShellComments() { // comments are stripped and ignored in the shell mode String text = "for # while case\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(2, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.SH_FOR, "for"); checkToken(tokens.get(1), document, ITokenConstants.EOL, "\n"); } @Test public void testShellComments2() { // comments are stripped and ignored in the shell mode String text = "# while case\n" + "#for x in 3\n" + "\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(3, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.EOL, "\n"); checkToken(tokens.get(1), document, ITokenConstants.EOL, "\n"); checkToken(tokens.get(2), document, ITokenConstants.EOL, "\n"); } @Test public void testM4Tokens0() { String text = "while_stuff incase"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, true); checkNoErrors(); assertEquals(2, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "while_stuff"); checkToken(tokens.get(1), document, ITokenConstants.WORD, "incase"); } @Test public void testShellStrings() { String QUOTED = "ls -la \"*.c\""; String text = "echo `" + QUOTED + "`\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(3, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "echo"); checkToken(tokens.get(1), document, ITokenConstants.SH_STRING_BACKTICK, QUOTED, QUOTED.length() + 2); checkToken(tokens.get(2), document, ITokenConstants.EOL, "\n"); } @Test public void testShellStrings2() { String QUOTED = "ls -la 'space file'"; String text = "echo \"" + QUOTED + "\"\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(3, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "echo"); checkToken(tokens.get(1), document, ITokenConstants.SH_STRING_DOUBLE, QUOTED, QUOTED.length() + 2); checkToken(tokens.get(2), document, ITokenConstants.EOL, "\n"); } @Test public void testShellStrings3() { String QUOTED = "echo \"*.c\" | sed s/[a-z]/[A-Z]/g"; String text = "echo '" + QUOTED + "'\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, false); checkNoErrors(); assertEquals(3, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "echo"); checkToken(tokens.get(1), document, ITokenConstants.SH_STRING_SINGLE, QUOTED, QUOTED.length() + 2); checkToken(tokens.get(2), document, ITokenConstants.EOL, "\n"); } @Test public void testM4Tokens1() { String text = "define(`hi\', `HI\')\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, true); checkNoErrors(); assertEquals(7, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "define"); checkToken(tokens.get(1), document, ITokenConstants.LPAREN, "("); // strings are unquoted in token text checkToken(tokens.get(2), document, ITokenConstants.M4_STRING, "hi", 4); checkToken(tokens.get(3), document, ITokenConstants.COMMA, ","); checkToken(tokens.get(4), document, ITokenConstants.M4_STRING, "HI", 4); checkToken(tokens.get(5), document, ITokenConstants.RPAREN, ")"); checkToken(tokens.get(6), document, ITokenConstants.EOL, "\n"); } @Test public void testM4Comments() { String text = "dnl # comment\n"; IDocument document = createDocument(text); List<Token> tokens = tokenize(document, true); checkNoErrors(); assertEquals(2, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "dnl"); checkToken(tokens.get(1), document, ITokenConstants.M4_COMMENT, "# comment\n"); } @Test public void testM4Comments2() { String text = "dnl /* word(`quoted')\n" + "*/\n"; IDocument document = createDocument(text); AutoconfTokenizer tokenizer = createTokenizer(document); tokenizer.setM4Context(true); tokenizer.setM4Comment("/*", "*/"); List<Token> tokens = tokenize(tokenizer); assertEquals(3, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "dnl"); checkToken(tokens.get(1), document, ITokenConstants.M4_COMMENT, "/* word(`quoted')\n*/"); checkToken(tokens.get(2), document, ITokenConstants.EOL, "\n"); } @Test public void testM4Strings1() { // double quotes only removes one level of quotes String text = "``double''\n"; IDocument document = createDocument(text); AutoconfTokenizer tokenizer = createTokenizer(document); tokenizer.setM4Context(true); List<Token> tokens = tokenize(tokenizer); assertEquals(2, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.M4_STRING, "`double'", 8 + 1 + 1); checkToken(tokens.get(1), document, ITokenConstants.EOL, "\n"); } @Test public void testM4Strings2() { String text = "myword(!!boundary==)\n"; IDocument document = createDocument(text); AutoconfTokenizer tokenizer = createTokenizer(document); tokenizer.setM4Context(true); tokenizer.setM4Quote("!!", "=="); List<Token> tokens = tokenize(tokenizer); assertEquals(5, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "myword"); checkToken(tokens.get(1), document, ITokenConstants.LPAREN, "("); checkToken(tokens.get(2), document, ITokenConstants.M4_STRING, "boundary", 8 + 2 + 2); checkToken(tokens.get(3), document, ITokenConstants.RPAREN, ")"); checkToken(tokens.get(4), document, ITokenConstants.EOL, "\n"); } @Test public void testM4Tokens2() { // dollar is not seen in m4 mode (only important when expanding) String text = "define(foo,$1)\n"; IDocument document = createDocument(text); AutoconfTokenizer tokenizer = createTokenizer(document); tokenizer.setM4Context(true); List<Token> tokens = tokenize(tokenizer); assertEquals(8, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.WORD, "define"); checkToken(tokens.get(1), document, ITokenConstants.LPAREN, "("); checkToken(tokens.get(2), document, ITokenConstants.WORD, "foo"); checkToken(tokens.get(3), document, ITokenConstants.COMMA, ","); checkToken(tokens.get(4), document, ITokenConstants.TEXT, "$"); checkToken(tokens.get(5), document, ITokenConstants.TEXT, "1"); checkToken(tokens.get(6), document, ITokenConstants.RPAREN, ")"); checkToken(tokens.get(7), document, ITokenConstants.EOL, "\n"); } @Test public void testM4QuoteNesting() { String quote = "this is `nested\'!"; String text = "`" + quote +"'\n"; IDocument document = createDocument(text); AutoconfTokenizer tokenizer = createTokenizer(document); tokenizer.setM4Context(true); List<Token> tokens = tokenize(tokenizer); assertEquals(2, tokens.size()); checkToken(tokens.get(0), document, ITokenConstants.M4_STRING, quote, quote.length() + 2); checkToken(tokens.get(1), document, ITokenConstants.EOL, "\n"); } @Test public void testMixedContext() { String text = "AM_INIT([arg])if true\n"; IDocument document = createDocument(text); AutoconfTokenizer tokenizer = createTokenizer(document); tokenizer.setM4Context(false); tokenizer.setM4Quote("[", "]"); Token token; token = tokenizer.readToken(); checkToken(token, document, ITokenConstants.WORD, "AM_INIT"); // "hey, that's a macro" tokenizer.setM4Context(true); token = tokenizer.readToken(); checkToken(token, document, ITokenConstants.LPAREN, "("); token = tokenizer.readToken(); checkToken(token, document, ITokenConstants.M4_STRING, "arg", 5); token = tokenizer.readToken(); checkToken(token, document, ITokenConstants.RPAREN, ")"); // "check it's not a dangling paren" // it'll still be an m4 word token = tokenizer.readToken(); checkToken(token, document, ITokenConstants.WORD, "if"); // push back token tokenizer.unreadToken(token); // "done reading macro" tokenizer.setM4Context(false); // "get shell stuff" token = tokenizer.readToken(); checkToken(token, document, ITokenConstants.SH_IF, "if"); token = tokenizer.readToken(); checkToken(token, document, ITokenConstants.WORD, "true"); token = tokenizer.readToken(); checkToken(token, document, ITokenConstants.EOL, "\n"); checkToken(tokenizer.readToken(), document, ITokenConstants.EOF); } private AutoconfTokenizer createTokenizer(IDocument document) { return new AutoconfTokenizer(document, (ParseException exception) -> { fail(exception.toString()); }); } private void checkToken(Token token, IDocument document, int type) { assertEquals(type, token.getType()); assertSame(document, token.getDocument()); assertTrue(token.getOffset() >= 0); assertTrue(token.getType() == ITokenConstants.EOF || token.getLength() > 0); assertEquals(document.get().substring(token.getOffset(), token.getOffset() + token.getLength()), token.getText()); } private void checkToken(Token token, IDocument document, int type, String text) { assertEquals(type, token.getType()); assertSame(document, token.getDocument()); assertTrue(token.getOffset() >= 0); assertEquals(text, token.getText()); assertEquals(text.length(), token.getLength()); } private void checkToken(Token token, IDocument document, int type, String text, int length) { assertEquals(type, token.getType()); assertSame(document, token.getDocument()); assertTrue(token.getOffset() >= 0); assertEquals(text, token.getText()); assertEquals(length, token.getLength()); } }