/** * BSD-style license; for more info see http://pmd.sourceforge.net/license.html */ package net.sourceforge.pmd.cpd; import java.io.Reader; import java.io.StringReader; import org.apache.commons.io.IOUtils; import net.sourceforge.pmd.lang.LanguageRegistry; import net.sourceforge.pmd.lang.LanguageVersionHandler; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.TokenMgrError; import net.sourceforge.pmd.lang.python.PythonLanguageModule; import net.sourceforge.pmd.lang.python.ast.Token; import net.sourceforge.pmd.util.IOUtil; /** * The Python tokenizer. */ public class PythonTokenizer implements Tokenizer { @Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); Reader reader = null; try { LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(PythonLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); reader = new StringReader(buffer.toString()); reader = IOUtil.skipBOM(reader); TokenManager tokenManager = languageVersionHandler .getParser(languageVersionHandler.getDefaultParserOptions()) .getTokenManager(sourceCode.getFileName(), reader); Token currentToken = (Token) tokenManager.getNextToken(); while (currentToken.image.length() > 0) { tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenManager.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); System.err.println("Added " + sourceCode); } catch (TokenMgrError err) { err.printStackTrace(); System.err.println("Skipping " + sourceCode + " due to parse error"); tokenEntries.add(TokenEntry.getEOF()); } finally { IOUtils.closeQuietly(reader); } } }