/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
import org.apache.commons.io.IOUtils;
import org.codehaus.groovy.antlr.parser.GroovyLexer;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import groovyjarjarantlr.Token;
import groovyjarjarantlr.TokenStream;
import groovyjarjarantlr.TokenStreamException;
/**
* The Grooovy Tokenizer
*/
public class GroovyTokenizer implements Tokenizer {
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
StringBuilder buffer = sourceCode.getCodeBuffer();
GroovyLexer lexer = new GroovyLexer(IOUtils.toInputStream(buffer.toString()));
TokenStream tokenStream = lexer.plumb();
try {
Token token = tokenStream.nextToken();
while (token.getType() != Token.EOF_TYPE) {
TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
tokenEntries.add(tokenEntry);
token = tokenStream.nextToken();
}
} catch (TokenStreamException err) {
// Wrap exceptions of the Groovy tokenizer in a TokenMgrError, so
// they are correctly handled
// when CPD is executed with the '--skipLexicalErrors' command line
// option
throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + lexer.getLine()
+ ", column " + lexer.getColumn() + ". Encountered: " + err.getMessage(),
TokenMgrError.LEXICAL_ERROR);
} finally {
tokenEntries.add(TokenEntry.getEOF());
}
}
}