package org.checkerframework.checker.regex; import com.sun.source.tree.BinaryTree; import com.sun.source.tree.CompoundAssignmentTree; import com.sun.source.tree.ExpressionTree; import com.sun.source.tree.LiteralTree; import com.sun.source.tree.MethodInvocationTree; import com.sun.source.tree.Tree; import java.lang.annotation.Annotation; import java.util.Set; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import javax.lang.model.element.AnnotationMirror; import javax.lang.model.element.AnnotationValue; import javax.lang.model.element.ExecutableElement; import org.checkerframework.checker.regex.qual.PartialRegex; import org.checkerframework.checker.regex.qual.PolyRegex; import org.checkerframework.checker.regex.qual.Regex; import org.checkerframework.checker.regex.qual.RegexBottom; import org.checkerframework.checker.regex.qual.UnknownRegex; import org.checkerframework.common.basetype.BaseAnnotatedTypeFactory; import org.checkerframework.common.basetype.BaseTypeChecker; import org.checkerframework.framework.flow.CFAbstractAnalysis; import org.checkerframework.framework.flow.CFAnalysis; import org.checkerframework.framework.flow.CFStore; import org.checkerframework.framework.flow.CFTransfer; import org.checkerframework.framework.flow.CFValue; import org.checkerframework.framework.type.AnnotatedTypeFactory; import org.checkerframework.framework.type.AnnotatedTypeMirror; import org.checkerframework.framework.type.AnnotatedTypeMirror.AnnotatedIntersectionType; import org.checkerframework.framework.type.AnnotatedTypeMirror.AnnotatedTypeVariable; import org.checkerframework.framework.type.AnnotatedTypeMirror.AnnotatedWildcardType; import org.checkerframework.framework.type.QualifierHierarchy; import org.checkerframework.framework.type.treeannotator.ImplicitsTreeAnnotator; import org.checkerframework.framework.type.treeannotator.ListTreeAnnotator; import org.checkerframework.framework.type.treeannotator.PropagationTreeAnnotator; import org.checkerframework.framework.type.treeannotator.TreeAnnotator; import org.checkerframework.framework.util.AnnotationBuilder; import org.checkerframework.framework.util.GraphQualifierHierarchy; import org.checkerframework.framework.util.MultiGraphQualifierHierarchy.MultiGraphFactory; import org.checkerframework.javacutil.AnnotationUtils; import org.checkerframework.javacutil.TreeUtils; /** * Adds {@link Regex} to the type of tree, in the following cases: * * <ol> * <li value="1">a {@code String} or {@code char} literal that is a valid regular expression * <li value="2">concatenation of two valid regular expression values (either {@code String} or * {@code char}) or two partial regular expression values that make a valid regular expression * when concatenated. * <li value="3">for calls to Pattern.compile changes the group count value of the return type to * be the same as the parameter. For calls to the asRegex methods of the classes in * asRegexClasses these asRegex methods will return a {@code @Regex String} with the same * group count as the second argument to the call to asRegex. * <!--<li value="4">initialization of a char array that when converted to a String * is a valid regular expression.</li>--> * </ol> * * Provides a basic analysis of concatenation of partial regular expressions to determine if a valid * regular expression is produced by concatenating non-regular expression Strings. Do do this, * {@link PartialRegex} is added to the type of tree in the following cases: * * <ol> * <li value="1">a String literal that is not a valid regular expression. * <li value="2">concatenation of two partial regex Strings that doesn't result in a regex String * or a partial regex and regex String. * </ol> * * Also, adds {@link PolyRegex} to the type of String/char concatenation of a Regex and a PolyRegex * or two PolyRegexs. */ public class RegexAnnotatedTypeFactory extends BaseAnnotatedTypeFactory { /** * The Pattern.compile method. * * @see java.util.regex.Pattern#compile(String) */ private final ExecutableElement patternCompile; /** * The value method of the PartialRegex qualifier. * * @see org.checkerframework.checker.regex.qual.PartialRegex */ private final ExecutableElement partialRegexValue; /** * Class names that contain an {@code asRegex(String, int)} method. These asRegex methods will * return a {@code @Regex String} with the same group count as the second parameter to the * asRegex call. * * @see RegexUtil#asRegex(String, int) */ /*package-scope*/ static final String[] regexUtilClasses = new String[] { "org.checkerframework.checker.regex.RegexUtil", "plume.RegexUtil", "daikon.util.RegexUtil" }; protected final AnnotationMirror REGEX, REGEXBOTTOM, PARTIALREGEX, POLYREGEX; protected final ExecutableElement regexValueElement; // TODO use? private TypeMirror[] legalReferenceTypes; public RegexAnnotatedTypeFactory(BaseTypeChecker checker) { super(checker); patternCompile = TreeUtils.getMethod( java.util.regex.Pattern.class.getName(), "compile", 1, processingEnv); partialRegexValue = TreeUtils.getMethod( org.checkerframework.checker.regex.qual.PartialRegex.class.getName(), "value", 0, processingEnv); REGEX = AnnotationUtils.fromClass(elements, Regex.class); REGEXBOTTOM = AnnotationUtils.fromClass(elements, RegexBottom.class); PARTIALREGEX = AnnotationUtils.fromClass(elements, PartialRegex.class); POLYREGEX = AnnotationUtils.fromClass(elements, PolyRegex.class); regexValueElement = TreeUtils.getMethod( org.checkerframework.checker.regex.qual.Regex.class.getName(), "value", 0, processingEnv); addAliasedAnnotation(org.checkerframework.checker.regex.qual.PolyRegex.class, POLYREGEX); /* legalReferenceTypes = new TypeMirror[] { getTypeMirror("java.lang.CharSequence"), getTypeMirror("java.lang.Character"), getTypeMirror("java.util.regex.Pattern"), getTypeMirror("java.util.regex.MatchResult") }; */ this.postInit(); } @Override protected Set<Class<? extends Annotation>> createSupportedTypeQualifiers() { return getBundledTypeQualifiersWithPolyAll( Regex.class, PartialRegex.class, RegexBottom.class, UnknownRegex.class); } @Override public CFTransfer createFlowTransferFunction( CFAbstractAnalysis<CFValue, CFStore, CFTransfer> analysis) { return new RegexTransfer((CFAnalysis) analysis); } @Override public TreeAnnotator createTreeAnnotator() { return new ListTreeAnnotator( new ImplicitsTreeAnnotator(this), new RegexTreeAnnotator(this), new RegexPropagationAnnotator(this)); } /** Returns a new Regex annotation with the given group count. */ /*package-scope*/ AnnotationMirror createRegexAnnotation(int groupCount) { AnnotationBuilder builder = new AnnotationBuilder(processingEnv, Regex.class); if (groupCount > 0) { builder.setValue("value", groupCount); } return builder.build(); } @Override public QualifierHierarchy createQualifierHierarchy(MultiGraphFactory factory) { return new RegexQualifierHierarchy(factory, REGEXBOTTOM); } /** * A custom qualifier hierarchy for the Regex Checker. This makes a regex annotation a subtype * of all regex annotations with lower group count values. For example, {@code @Regex(3)} is a * subtype of {@code @Regex(1)}. All regex annotations are subtypes of {@code @Regex}, which has * a default value of 0. */ private final class RegexQualifierHierarchy extends GraphQualifierHierarchy { public RegexQualifierHierarchy(MultiGraphFactory f, AnnotationMirror bottom) { super(f, bottom); } @Override public boolean isSubtype(AnnotationMirror subAnno, AnnotationMirror superAnno) { if (AnnotationUtils.areSameIgnoringValues(subAnno, REGEX) && AnnotationUtils.areSameIgnoringValues(superAnno, REGEX)) { int rhsValue = getRegexValue(subAnno); int lhsValue = getRegexValue(superAnno); return lhsValue <= rhsValue; } // TODO: subtyping between PartialRegex? // Ignore annotation values to ensure that annotation is in supertype map. if (AnnotationUtils.areSameIgnoringValues(superAnno, REGEX)) { superAnno = REGEX; } if (AnnotationUtils.areSameIgnoringValues(subAnno, REGEX)) { subAnno = REGEX; } if (AnnotationUtils.areSameIgnoringValues(superAnno, PARTIALREGEX)) { superAnno = PARTIALREGEX; } if (AnnotationUtils.areSameIgnoringValues(subAnno, PARTIALREGEX)) { subAnno = PARTIALREGEX; } return super.isSubtype(subAnno, superAnno); } /** Gets the value out of a regex annotation. */ private int getRegexValue(AnnotationMirror anno) { return (Integer) AnnotationUtils.getElementValuesWithDefaults(anno) .get(regexValueElement) .getValue(); } } /** * Returns the group count value of the given annotation or 0 if there's a problem getting the * group count value. */ public int getGroupCount(AnnotationMirror anno) { AnnotationValue groupCountValue = AnnotationUtils.getElementValuesWithDefaults(anno).get(regexValueElement); // If group count value is null then there's no Regex annotation // on the parameter so set the group count to 0. This would happen // if a non-regex string is passed to Pattern.compile but warnings // are suppressed. return (groupCountValue == null) ? 0 : (Integer) groupCountValue.getValue(); } /** Returns the number of groups in the given regex String. */ public static int getGroupCount(/*@Regex*/ String regex) { return Pattern.compile(regex).matcher("").groupCount(); } /** * This method is a copy of RegexUtil.isRegex. We cannot directly use RegexUtil, because it uses * type annotations which cannot be used in IDEs (yet). */ @SuppressWarnings("purity") // the checker cannot prove that the method is pure, but it is /*@org.checkerframework.dataflow.qual.Pure*/ private static boolean isRegex(String s) { try { Pattern.compile(s); } catch (PatternSyntaxException e) { return false; } return true; } private static class RegexPropagationAnnotator extends PropagationTreeAnnotator { public RegexPropagationAnnotator(AnnotatedTypeFactory atypeFactory) { super(atypeFactory); } @Override public Void visitBinary(BinaryTree node, AnnotatedTypeMirror type) { // Don't call super method which will try to create a LUB // Even when it is not yet valid: i.e. between a @PolyRegex and a @Regex return null; } } private class RegexTreeAnnotator extends TreeAnnotator { public RegexTreeAnnotator(AnnotatedTypeFactory atypeFactory) { super(atypeFactory); } /** * Case 1: valid regular expression String or char literal. Adds PartialRegex annotation to * String literals that are not valid regular expressions. */ @Override public Void visitLiteral(LiteralTree tree, AnnotatedTypeMirror type) { if (!type.isAnnotatedInHierarchy(REGEX)) { String regex = null; if (tree.getKind() == Tree.Kind.STRING_LITERAL) { regex = (String) tree.getValue(); } else if (tree.getKind() == Tree.Kind.CHAR_LITERAL) { regex = Character.toString((Character) tree.getValue()); } if (regex != null) { if (isRegex(regex)) { int groupCount = getGroupCount(regex); type.addAnnotation(createRegexAnnotation(groupCount)); } else { type.addAnnotation(createPartialRegexAnnotation(regex)); } } } return super.visitLiteral(tree, type); } /** * Case 2: concatenation of Regex or PolyRegex String/char literals. Also handles * concatenation of partial regular expressions. */ @Override public Void visitBinary(BinaryTree tree, AnnotatedTypeMirror type) { if (!type.isAnnotatedInHierarchy(REGEX) && TreeUtils.isStringConcatenation(tree)) { AnnotatedTypeMirror lExpr = getAnnotatedType(tree.getLeftOperand()); AnnotatedTypeMirror rExpr = getAnnotatedType(tree.getRightOperand()); Integer lGroupCount = getMinimumRegexCount(lExpr); Integer rGroupCount = getMinimumRegexCount(rExpr); boolean lExprRE = lGroupCount != null; boolean rExprRE = rGroupCount != null; boolean lExprPart = lExpr.hasAnnotation(PartialRegex.class); boolean rExprPart = rExpr.hasAnnotation(PartialRegex.class); boolean lExprPoly = lExpr.hasAnnotation(PolyRegex.class); boolean rExprPoly = rExpr.hasAnnotation(PolyRegex.class); if (lExprRE && rExprRE) { // Remove current @Regex annotation... type.removeAnnotationInHierarchy(REGEX); // ...and add a new one with the correct group count value. type.addAnnotation(createRegexAnnotation(lGroupCount + rGroupCount)); } else if ((lExprPoly && rExprPoly) || (lExprPoly && rExprRE) || (lExprRE && rExprPoly)) { type.addAnnotation(PolyRegex.class); } else if (lExprPart && rExprPart) { String lRegex = getPartialRegexValue(lExpr); String rRegex = getPartialRegexValue(rExpr); String concat = lRegex + rRegex; if (isRegex(concat)) { int groupCount = getGroupCount(concat); type.addAnnotation(createRegexAnnotation(groupCount)); } else { type.addAnnotation(createPartialRegexAnnotation(concat)); } } else if (lExprRE && rExprPart) { String rRegex = getPartialRegexValue(rExpr); String concat = "e" + rRegex; type.addAnnotation(createPartialRegexAnnotation(concat)); } else if (lExprPart && rExprRE) { String lRegex = getPartialRegexValue(lExpr); String concat = lRegex + "e"; type.addAnnotation(createPartialRegexAnnotation(concat)); } } return null; // super.visitBinary(tree, type); } /** Case 2: Also handle compound String concatenation. */ @Override public Void visitCompoundAssignment(CompoundAssignmentTree node, AnnotatedTypeMirror type) { if (TreeUtils.isStringCompoundConcatenation(node)) { AnnotatedTypeMirror rhs = getAnnotatedType(node.getExpression()); AnnotatedTypeMirror lhs = getAnnotatedType(node.getVariable()); final Integer lhsRegexCount = getMinimumRegexCount(lhs); final Integer rhsRegexCount = getMinimumRegexCount(rhs); if (lhsRegexCount != null && rhsRegexCount != null) { int lCount = getGroupCount(lhs.getAnnotation(Regex.class)); int rCount = getGroupCount(rhs.getAnnotation(Regex.class)); type.removeAnnotationInHierarchy(REGEX); type.addAnnotation(createRegexAnnotation(lCount + rCount)); } } return null; // super.visitCompoundAssignment(node, type); } /** * Case 3: For a call to Pattern.compile, add an annotation to the return type that has the * same group count value as the parameter. For calls to {@code asRegex(String, int)} change * the return type to have the same group count as the value of the second argument. */ @Override public Void visitMethodInvocation(MethodInvocationTree tree, AnnotatedTypeMirror type) { // TODO: Also get this to work with 2 argument Pattern.compile. if (TreeUtils.isMethodInvocation(tree, patternCompile, processingEnv)) { ExpressionTree arg0 = tree.getArguments().get(0); final AnnotatedTypeMirror argType = getAnnotatedType(arg0); Integer regexCount = getMinimumRegexCount(argType); AnnotationMirror bottomAnno = getAnnotatedType(arg0).getAnnotation(RegexBottom.class); if (regexCount != null) { // Remove current @Regex annotation... // ...and add a new one with the correct group count value. type.replaceAnnotation(createRegexAnnotation(regexCount)); } else if (bottomAnno != null) { type.replaceAnnotation(AnnotationUtils.fromClass(elements, RegexBottom.class)); } } return super.visitMethodInvocation(tree, type); } /** Returns a new PartialRegex annotation with the given partial regular expression. */ private AnnotationMirror createPartialRegexAnnotation(String partial) { AnnotationBuilder builder = new AnnotationBuilder(processingEnv, PartialRegex.class); builder.setValue("value", partial); return builder.build(); } /** Returns the value of a PartialRegex annotation. */ private String getPartialRegexValue(AnnotatedTypeMirror type) { return (String) AnnotationUtils.getElementValuesWithDefaults( type.getAnnotation(PartialRegex.class)) .get(partialRegexValue) .getValue(); } /** * Returns the value of the Regex annotation on the given type or NULL if there is no Regex * annotation. If type is a TYPEVAR, WILDCARD, or INTERSECTION type, visit first their * primary annotation then visit their upper bounds to get the Regex annotation. The method * gets "minimum" regex count because, depending on the bounds of a typevar or wildcard, the * actual type may have more than the upper bound's count. * * @param type type that may carry a Regex annotation * @return the Integer value of the Regex annotation (0 if no value exists) */ private Integer getMinimumRegexCount(final AnnotatedTypeMirror type) { final AnnotationMirror primaryRegexAnno = type.getAnnotation(Regex.class); if (primaryRegexAnno == null) { switch (type.getKind()) { case TYPEVAR: return getMinimumRegexCount(((AnnotatedTypeVariable) type).getUpperBound()); case WILDCARD: return getMinimumRegexCount( ((AnnotatedWildcardType) type).getExtendsBound()); case INTERSECTION: Integer maxBound = null; for (final AnnotatedTypeMirror bound : ((AnnotatedIntersectionType) type).directSuperTypes()) { Integer boundRegexNum = getMinimumRegexCount(bound); if (boundRegexNum != null) { if (maxBound == null || boundRegexNum > maxBound) { maxBound = boundRegexNum; } } } return maxBound; default: // Nothing to do for other cases. } return null; } return getGroupCount(primaryRegexAnno); } // This won't work correctly until flow sensitivity is supported by the // the Regex Checker. For example: // // char @Regex [] arr = {'r', 'e'}; // arr[0] = '('; // type is still "char @Regex []", but this is no longer correct // // There are associated tests in tests/regex/Simple.java:testCharArrays // that can be uncommented when this is uncommented. // /** // * Case 4: a char array that as a String is a valid regular expression. // */ // @Override // public Void visitNewArray(NewArrayTree tree, AnnotatedTypeMirror type) { // boolean isCharArray = ((ArrayType) type.getUnderlyingType()) // .getComponentType().getKind() == TypeKind.CHAR; // if (isCharArray && tree.getInitializers() != null) { // List<? extends ExpressionTree> initializers = tree.getInitializers(); // StringBuilder charArray = new StringBuilder(); // boolean allLiterals = true; // for (int i = 0; allLiterals && i < initializers.size(); i++) { // ExpressionTree e = initializers.get(i); // if (e.getKind() == Tree.Kind.CHAR_LITERAL) { // charArray.append(((LiteralTree) e).getValue()); // } else if (getAnnotatedType(e).hasAnnotation(Regex.class)) { // // if there's an @Regex char in the array then substitute // // it with a . // charArray.append('.'); // } else { // allLiterals = false; // } // } // if (allLiterals && RegexUtil.isRegex(charArray.toString())) { // type.addAnnotation(Regex.class); // } // } // return super.visitNewArray(tree, type); // } } }