/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
import java.io.StringReader;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Properties;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersionHandler;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.java.JavaLanguageModule;
import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
import net.sourceforge.pmd.lang.java.ast.Token;
public class JavaTokenizer implements Tokenizer {
public static final String CPD_START = "\"CPD-START\"";
public static final String CPD_END = "\"CPD-END\"";
private boolean ignoreAnnotations;
private boolean ignoreLiterals;
private boolean ignoreIdentifiers;
public void setProperties(Properties properties) {
ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
}
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
StringBuilder stringBuilder = sourceCode.getCodeBuffer();
// Note that Java version is irrelevant for tokenizing
LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME)
.getVersion("1.4").getLanguageVersionHandler();
String fileName = sourceCode.getFileName();
TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions())
.getTokenManager(fileName, new StringReader(stringBuilder.toString()));
Token currentToken = (Token) tokenMgr.getNextToken();
TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations);
ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers);
while (currentToken.image.length() > 0) {
discarder.updateState(currentToken);
if (discarder.isDiscarding()) {
currentToken = (Token) tokenMgr.getNextToken();
continue;
}
processToken(tokenEntries, fileName, currentToken, constructorDetector);
currentToken = (Token) tokenMgr.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
}
private void processToken(Tokens tokenEntries, String fileName, Token currentToken,
ConstructorDetector constructorDetector) {
String image = currentToken.image;
constructorDetector.restoreConstructorToken(tokenEntries, currentToken);
if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL
|| currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
|| currentToken.kind == JavaParserConstants.DECIMAL_LITERAL
|| currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
image = String.valueOf(currentToken.kind);
}
if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
image = String.valueOf(currentToken.kind);
}
constructorDetector.processToken(currentToken);
tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
}
public void setIgnoreLiterals(boolean ignore) {
this.ignoreLiterals = ignore;
}
public void setIgnoreIdentifiers(boolean ignore) {
this.ignoreIdentifiers = ignore;
}
public void setIgnoreAnnotations(boolean ignoreAnnotations) {
this.ignoreAnnotations = ignoreAnnotations;
}
/**
* The {@link TokenDiscarder} consumes token by token and maintains state.
* It can detect, whether the current token belongs to an annotation and
* whether the current token should be discarded by CPD.
* <p>
* By default, it discards semicolons, package and import statements, and
* enables CPD suppression. Optionally, all annotations can be ignored, too.
* </p>
*/
private static class TokenDiscarder {
private boolean isAnnotation = false;
private boolean nextTokenEndsAnnotation = false;
private int annotationStack = 0;
private boolean discardingSemicolon = false;
private boolean discardingKeywords = false;
private boolean discardingSuppressing = false;
private boolean discardingAnnotations = false;
private boolean ignoreAnnotations = false;
TokenDiscarder(boolean ignoreAnnotations) {
this.ignoreAnnotations = ignoreAnnotations;
}
public void updateState(Token currentToken) {
detectAnnotations(currentToken);
skipSemicolon(currentToken);
skipPackageAndImport(currentToken);
skipCPDSuppression(currentToken);
if (ignoreAnnotations) {
skipAnnotations();
}
}
private void skipPackageAndImport(Token currentToken) {
if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
discardingKeywords = true;
} else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
discardingKeywords = false;
}
}
private void skipSemicolon(Token currentToken) {
if (currentToken.kind == JavaParserConstants.SEMICOLON) {
discardingSemicolon = true;
} else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
discardingSemicolon = false;
}
}
private void skipCPDSuppression(Token currentToken) {
// Check if a comment is altering the suppression state
Token st = currentToken.specialToken;
while (st != null) {
if (st.image.contains("CPD-OFF")) {
discardingSuppressing = true;
break;
}
if (st.image.contains("CPD-ON")) {
discardingSuppressing = false;
break;
}
st = st.specialToken;
}
// if processing an annotation, look for a CPD-START or CPD-END
if (isAnnotation) {
if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
&& CPD_START.equals(currentToken.image)) {
discardingSuppressing = true;
} else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
&& CPD_END.equals(currentToken.image)) {
discardingSuppressing = false;
}
}
}
private void skipAnnotations() {
if (!discardingAnnotations && isAnnotation) {
discardingAnnotations = true;
} else if (discardingAnnotations && !isAnnotation) {
discardingAnnotations = false;
}
}
public boolean isDiscarding() {
boolean result = discardingSemicolon || discardingKeywords || discardingAnnotations
|| discardingSuppressing;
return result;
}
private void detectAnnotations(Token currentToken) {
if (isAnnotation && nextTokenEndsAnnotation) {
isAnnotation = false;
nextTokenEndsAnnotation = false;
}
if (isAnnotation) {
if (currentToken.kind == JavaParserConstants.LPAREN) {
annotationStack++;
} else if (currentToken.kind == JavaParserConstants.RPAREN) {
annotationStack--;
if (annotationStack == 0) {
nextTokenEndsAnnotation = true;
}
} else if (annotationStack == 0 && currentToken.kind != JavaParserConstants.IDENTIFIER
&& currentToken.kind != JavaParserConstants.LPAREN) {
isAnnotation = false;
}
}
if (currentToken.kind == JavaParserConstants.AT) {
isAnnotation = true;
}
}
}
/**
* The {@link ConstructorDetector} consumes token by token and maintains
* state. It can detect, whether the current token belongs to a constructor
* method identifier and if so, is able to restore it when using
* ignoreIdentifiers.
*/
private static class ConstructorDetector {
private boolean ignoreIdentifiers;
private Deque<TypeDeclaration> classMembersIndentations;
private int currentNestingLevel;
private boolean storeNextIdentifier;
private String prevIdentifier;
ConstructorDetector(boolean ignoreIdentifiers) {
this.ignoreIdentifiers = ignoreIdentifiers;
currentNestingLevel = 0;
classMembersIndentations = new LinkedList<TypeDeclaration>();
}
public void processToken(Token currentToken) {
if (!ignoreIdentifiers) {
return;
}
switch (currentToken.kind) {
case JavaParserConstants.IDENTIFIER:
if ("enum".equals(currentToken.image)) {
// If declaring an enum, add a new block nesting level at
// which constructors may exist
pushTypeDeclaration();
} else if (storeNextIdentifier) {
classMembersIndentations.peek().name = currentToken.image;
storeNextIdentifier = false;
}
// Store this token
prevIdentifier = currentToken.image;
break;
case JavaParserConstants.CLASS:
// If declaring a class, add a new block nesting level at which
// constructors may exist
pushTypeDeclaration();
break;
case JavaParserConstants.LBRACE:
currentNestingLevel++;
break;
case JavaParserConstants.RBRACE:
// Discard completed blocks
if (!classMembersIndentations.isEmpty()
&& classMembersIndentations.peek().indentationLevel == currentNestingLevel) {
classMembersIndentations.pop();
}
currentNestingLevel--;
break;
default:
/*
* Did we find a "class" token not followed by an identifier? i.e:
* expectThrows(IllegalStateException.class, () -> {
* newSearcher(r).search(parentQuery.build(), c);
* });
*/
if (storeNextIdentifier) {
classMembersIndentations.pop();
storeNextIdentifier = false;
}
break;
}
}
private void pushTypeDeclaration() {
TypeDeclaration cd = new TypeDeclaration(currentNestingLevel + 1);
classMembersIndentations.push(cd);
storeNextIdentifier = true;
}
public void restoreConstructorToken(Tokens tokenEntries, Token currentToken) {
if (!ignoreIdentifiers) {
return;
}
if (currentToken.kind == JavaParserConstants.LPAREN) {
// was the previous token a constructor? If so, restore the
// identifier
if (!classMembersIndentations.isEmpty()
&& classMembersIndentations.peek().name.equals(prevIdentifier)) {
int lastTokenIndex = tokenEntries.size() - 1;
TokenEntry lastToken = tokenEntries.getTokens().get(lastTokenIndex);
lastToken.setImage(prevIdentifier);
}
}
}
}
private static class TypeDeclaration {
int indentationLevel;
String name;
TypeDeclaration(int indentationLevel) {
this.indentationLevel = indentationLevel;
}
}
}