/*******************************************************************************
* Copyright (c) 2010, 2011 Meisam Fathi and others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Meisam Fathi - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.codan.internal.checkers.fs;
import java.util.Collection;
import java.util.Iterator;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class parses the format string argument and extracts all %s tokens.
*
* @version 0.2, June 04, 2010
* @author Meisam Fathi
*/
public class CFormatStringParser {
/**
* At least one digit should be present
*/
private static final String DIGIT_PATTERN = "[0-9][0-9]*";//$NON-NLS-1$
/**
* The general format for a <strong>format string</strong> argument is
* "%[*][size][modifier]type", in which type is one of the following items:
* <ul>
* <li>c: Single character.
* <li>d: Decimal integer.
* <li>e,E,f,g,G: Floating point.
* <li>o Octal: integer.
* <li>s: String of characters.
* <li>u: Unsigned decimal integer.
* <li>x,X: Hexadecimal integer.
* </ul>
*
* @see {@link http://www.cplusplus.com/reference/clibrary/cstdio/scanf/}
* for more information.
*/
private static final String STRING_FORMAT_PATTERN = "%[\\*]?[0-9]*[hlL]?[cdeEfgGsuxX]";//$NON-NLS-1$
/**
* If there is an asterisk in the format argument, then it cannot be
* vulnerable. If there is a [modifier] (i.e. hlL), then compiler warns.
* Hence, the only vulnerable arguments are arguments in which either there
* is no specified size, or there is a size greater than the size of the
* string.
*
* @see #FORMAT_STRING_PATTERN
*/
private static final String VULNERABLE_PATTERN = "%[0-9]*s";//$NON-NLS-1$
/**
* The pattern which represents a string format.
*/
private final Pattern argumentPattern;
/**
* The matcher which matches string format arguments.
*/
private final Matcher argumentMatcher;
/**
* The pattern which may lead to vulnerability in <code>scanf</code>
* function calls.
*/
private final Pattern vulnerablePattern;
/**
* I guess, this must be a concurrent Collection, but I'm not sure. --
* Meisam
*/
private final Collection<VulnerableFormatStringArgument> vulnerableArguments;
public final static int ARGUMENT_SIZE_NOT_SPECIFIED = -1;
/**
* Constructs an argument parser for the given argument.
*
* @param argument
*/
protected CFormatStringParser(final String argument) {
this.argumentPattern = Pattern.compile(STRING_FORMAT_PATTERN);
this.argumentMatcher = this.argumentPattern.matcher(argument);
this.vulnerablePattern = Pattern.compile(VULNERABLE_PATTERN);
this.vulnerableArguments = new ConcurrentLinkedQueue<VulnerableFormatStringArgument>();
extractVulnerableArguments();
}
/**
* If the given argument to this class is vulnerable, it returns true, else
* it return false.
*
* @return true if the format string argument is vulnerable.
*/
public boolean isVulnerable() {
return !this.vulnerableArguments.isEmpty();
}
public Iterator<VulnerableFormatStringArgument> getVulnerableArgumentsIterator() {
return this.vulnerableArguments.iterator();
}
/**
* This method is guaranteed to be invoked in the constructor of the class.
* DON'T invoke it yourself. It should be invoke only once.
*/
private void extractVulnerableArguments() {
/*
* I'm not sure if clearing the collection is necessary. -- Meisam Fathi
*/
this.vulnerableArguments.clear();
boolean hasMore = this.argumentMatcher.find();
int indexOfCurrentArgument = 0;
while (hasMore) {
final String formatString = this.argumentMatcher.group();
final String matchedArgument = formatString;
final Matcher vulnerabilityMatcher = this.vulnerablePattern.matcher(matchedArgument);
final boolean isVulnerable = vulnerabilityMatcher.find();
if (isVulnerable) {
final int argumentSize = parseArgumentSize(formatString);
final VulnerableFormatStringArgument vulnerableArgument = new VulnerableFormatStringArgument(indexOfCurrentArgument,
formatString, argumentSize);
this.vulnerableArguments.add(vulnerableArgument);
}
hasMore = this.argumentMatcher.find();
indexOfCurrentArgument++;
}
}
/**
* This method takes a string as input. The format of the input string is
* %[0-9]*s. If there is no digit present in the given string it returns
* <code>ARGUMENT_SIZE_NOT_SPECIFIED</code>, otherwise it returns the number
* specified after "%". For example:
* <ul>
* <li>%s ==> -1</li>
* <li>%123s ==> 123</li>
* <li>%1s ==> 1</li>
* <li>%015s ==> 15</li>
* <li>%0s ==> 0</li>
* </ul>
*
* @param formatString
* The given format string.
* @return Either ARGUMENT_SIZE_NOT_SPECIFIED or the number embedded in the
* input string.
*/
private int parseArgumentSize(final String formatString) {
// The minimum possible size for a string of format %[0-9]*s
final int MINIMUM_POSSIBLE_SIZE = 2;
int argumentSize = ARGUMENT_SIZE_NOT_SPECIFIED;
if (formatString.length() > MINIMUM_POSSIBLE_SIZE) {
final Pattern numberPattern = Pattern.compile(DIGIT_PATTERN);
final Matcher numberMatcher = numberPattern.matcher(formatString);
if (numberMatcher.find()) {
final String sizeModifierString = numberMatcher.group();
argumentSize = Integer.parseInt(sizeModifierString);
}
}
return argumentSize;
}
}