JavaTokenizer.java example

Explorer
pmd-master
/**
 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
 */

package net.sourceforge.pmd.cpd;

import java.io.StringReader;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Properties;

import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersionHandler;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.java.JavaLanguageModule;
import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
import net.sourceforge.pmd.lang.java.ast.Token;

public class JavaTokenizer implements Tokenizer {

    public static final String CPD_START = "\"CPD-START\"";
    public static final String CPD_END = "\"CPD-END\"";

    private boolean ignoreAnnotations;
    private boolean ignoreLiterals;
    private boolean ignoreIdentifiers;

    public void setProperties(Properties properties) {
        ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
        ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
        ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
    }

    public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
        StringBuilder stringBuilder = sourceCode.getCodeBuffer();

        // Note that Java version is irrelevant for tokenizing
        LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME)
                .getVersion("1.4").getLanguageVersionHandler();
        String fileName = sourceCode.getFileName();
        TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions())
                .getTokenManager(fileName, new StringReader(stringBuilder.toString()));
        Token currentToken = (Token) tokenMgr.getNextToken();

        TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations);
        ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers);

        while (currentToken.image.length() > 0) {
            discarder.updateState(currentToken);

            if (discarder.isDiscarding()) {
                currentToken = (Token) tokenMgr.getNextToken();
                continue;
            }

            processToken(tokenEntries, fileName, currentToken, constructorDetector);
            currentToken = (Token) tokenMgr.getNextToken();
        }
        tokenEntries.add(TokenEntry.getEOF());
    }

    private void processToken(Tokens tokenEntries, String fileName, Token currentToken,
            ConstructorDetector constructorDetector) {
        String image = currentToken.image;

        constructorDetector.restoreConstructorToken(tokenEntries, currentToken);

        if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL
                || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
                || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL
                || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
            image = String.valueOf(currentToken.kind);
        }
        if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
            image = String.valueOf(currentToken.kind);
        }

        constructorDetector.processToken(currentToken);

        tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
    }

    public void setIgnoreLiterals(boolean ignore) {
        this.ignoreLiterals = ignore;
    }

    public void setIgnoreIdentifiers(boolean ignore) {
        this.ignoreIdentifiers = ignore;
    }

    public void setIgnoreAnnotations(boolean ignoreAnnotations) {
        this.ignoreAnnotations = ignoreAnnotations;
    }

    /**
     * The {@link TokenDiscarder} consumes token by token and maintains state.
     * It can detect, whether the current token belongs to an annotation and
     * whether the current token should be discarded by CPD.
     * <p>
     * By default, it discards semicolons, package and import statements, and
     * enables CPD suppression. Optionally, all annotations can be ignored, too.
     * </p>
     */
    private static class TokenDiscarder {
        private boolean isAnnotation = false;
        private boolean nextTokenEndsAnnotation = false;
        private int annotationStack = 0;

        private boolean discardingSemicolon = false;
        private boolean discardingKeywords = false;
        private boolean discardingSuppressing = false;
        private boolean discardingAnnotations = false;
        private boolean ignoreAnnotations = false;

        TokenDiscarder(boolean ignoreAnnotations) {
            this.ignoreAnnotations = ignoreAnnotations;
        }

        public void updateState(Token currentToken) {
            detectAnnotations(currentToken);

            skipSemicolon(currentToken);
            skipPackageAndImport(currentToken);
            skipCPDSuppression(currentToken);
            if (ignoreAnnotations) {
                skipAnnotations();
            }
        }

        private void skipPackageAndImport(Token currentToken) {
            if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
                discardingKeywords = true;
            } else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
                discardingKeywords = false;
            }
        }

        private void skipSemicolon(Token currentToken) {
            if (currentToken.kind == JavaParserConstants.SEMICOLON) {
                discardingSemicolon = true;
            } else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
                discardingSemicolon = false;
            }
        }

        private void skipCPDSuppression(Token currentToken) {
            // Check if a comment is altering the suppression state
            Token st = currentToken.specialToken;
            while (st != null) {
                if (st.image.contains("CPD-OFF")) {
                    discardingSuppressing = true;
                    break;
                }
                if (st.image.contains("CPD-ON")) {
                    discardingSuppressing = false;
                    break;
                }
                st = st.specialToken;
            }

            // if processing an annotation, look for a CPD-START or CPD-END
            if (isAnnotation) {
                if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
                        && CPD_START.equals(currentToken.image)) {
                    discardingSuppressing = true;
                } else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
                        && CPD_END.equals(currentToken.image)) {
                    discardingSuppressing = false;
                }
            }
        }

        private void skipAnnotations() {
            if (!discardingAnnotations && isAnnotation) {
                discardingAnnotations = true;
            } else if (discardingAnnotations && !isAnnotation) {
                discardingAnnotations = false;
            }
        }

        public boolean isDiscarding() {
            boolean result = discardingSemicolon || discardingKeywords || discardingAnnotations
                    || discardingSuppressing;
            return result;
        }

        private void detectAnnotations(Token currentToken) {
            if (isAnnotation && nextTokenEndsAnnotation) {
                isAnnotation = false;
                nextTokenEndsAnnotation = false;
            }
            if (isAnnotation) {
                if (currentToken.kind == JavaParserConstants.LPAREN) {
                    annotationStack++;
                } else if (currentToken.kind == JavaParserConstants.RPAREN) {
                    annotationStack--;
                    if (annotationStack == 0) {
                        nextTokenEndsAnnotation = true;
                    }
                } else if (annotationStack == 0 && currentToken.kind != JavaParserConstants.IDENTIFIER
                        && currentToken.kind != JavaParserConstants.LPAREN) {
                    isAnnotation = false;
                }
            }
            if (currentToken.kind == JavaParserConstants.AT) {
                isAnnotation = true;
            }
        }
    }

    /**
     * The {@link ConstructorDetector} consumes token by token and maintains
     * state. It can detect, whether the current token belongs to a constructor
     * method identifier and if so, is able to restore it when using
     * ignoreIdentifiers.
     */
    private static class ConstructorDetector {
        private boolean ignoreIdentifiers;

        private Deque<TypeDeclaration> classMembersIndentations;
        private int currentNestingLevel;
        private boolean storeNextIdentifier;
        private String prevIdentifier;

        ConstructorDetector(boolean ignoreIdentifiers) {
            this.ignoreIdentifiers = ignoreIdentifiers;

            currentNestingLevel = 0;
            classMembersIndentations = new LinkedList<TypeDeclaration>();
        }

        public void processToken(Token currentToken) {
            if (!ignoreIdentifiers) {
                return;
            }

            switch (currentToken.kind) {
            case JavaParserConstants.IDENTIFIER:
                if ("enum".equals(currentToken.image)) {
                    // If declaring an enum, add a new block nesting level at
                    // which constructors may exist
                    pushTypeDeclaration();
                } else if (storeNextIdentifier) {
                    classMembersIndentations.peek().name = currentToken.image;
                    storeNextIdentifier = false;
                }

                // Store this token
                prevIdentifier = currentToken.image;
                break;

            case JavaParserConstants.CLASS:
                // If declaring a class, add a new block nesting level at which
                // constructors may exist
                pushTypeDeclaration();
                break;

            case JavaParserConstants.LBRACE:
                currentNestingLevel++;
                break;

            case JavaParserConstants.RBRACE:
                // Discard completed blocks
                if (!classMembersIndentations.isEmpty()
                        && classMembersIndentations.peek().indentationLevel == currentNestingLevel) {
                    classMembersIndentations.pop();
                }
                currentNestingLevel--;
                break;

            default:
                /*
                 * Did we find a "class" token not followed by an identifier? i.e:
                 * expectThrows(IllegalStateException.class, () -> {
                 *  newSearcher(r).search(parentQuery.build(), c);
                 * });
                 */
                if (storeNextIdentifier) {
                    classMembersIndentations.pop();
                    storeNextIdentifier = false;
                }
                break;
            }
        }

        private void pushTypeDeclaration() {
            TypeDeclaration cd = new TypeDeclaration(currentNestingLevel + 1);
            classMembersIndentations.push(cd);
            storeNextIdentifier = true;
        }

        public void restoreConstructorToken(Tokens tokenEntries, Token currentToken) {
            if (!ignoreIdentifiers) {
                return;
            }

            if (currentToken.kind == JavaParserConstants.LPAREN) {
                // was the previous token a constructor? If so, restore the
                // identifier
                if (!classMembersIndentations.isEmpty()
                        && classMembersIndentations.peek().name.equals(prevIdentifier)) {
                    int lastTokenIndex = tokenEntries.size() - 1;
                    TokenEntry lastToken = tokenEntries.getTokens().get(lastTokenIndex);
                    lastToken.setImage(prevIdentifier);
                }
            }
        }
    }

    private static class TypeDeclaration {
        int indentationLevel;
        String name;

        TypeDeclaration(int indentationLevel) {
            this.indentationLevel = indentationLevel;
        }
    }
}