/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.zaproxy.zap.utils;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
/**
* Support utility able to manage multiple string/regex occurrence
* searching inside a target content
*
* @author yhawke 2013
*/
public class ContentMatcher {
private static final String TAG_PATTERNS = "Patterns";
private static final String TAG_PATTERN = "Pattern";
private static final String TAG_PATTERN_TYPE = "type";
private static final String TAG_PATTERN_TYPE_STRING = "string";
private static final String TAG_PATTERN_TYPE_REGEX = "regex";
private List<BoyerMooreMatcher> strings;
private List<Pattern> patterns;
/**
* Direct method for a complete ContentMatcher instance creation.
* Use the ClassLoader for the resource detection and loading, be careful regarding the
* relative file name use (this class is in another package).
* @param xmlFileName the name of the XML file that need to be used for initialization
* @return a ContentMatcher instance
*/
public static ContentMatcher getInstance(String xmlFileName) {
ContentMatcher cm = new ContentMatcher();
// Load the pattern definitions from an XML file
try {
cm.loadXMLPatternDefinitions(cm.getClass().getResourceAsStream(xmlFileName));
} catch (JDOMException | IOException ex) {
throw new IllegalArgumentException("Failed to initialize the ContentMatcher object using: " + xmlFileName, ex);
}
return cm;
}
/**
* Direct method for a complete ContentMatcher instance creation.
* @param xmlInputStream the stream of the XML file that need to be used for initialization
* @return a ContentMatcher instance
*/
public static ContentMatcher getInstance(InputStream xmlInputStream) {
ContentMatcher cm = new ContentMatcher();
// Load the pattern definitions from an XML file
try {
cm.loadXMLPatternDefinitions(xmlInputStream);
} catch (JDOMException | IOException ex) {
throw new IllegalArgumentException("Failed to initialize the ContentMatcher object using that stream", ex);
}
return cm;
}
/**
* Load a pattern list from an XML formatted file.
* Pattern should be enclosed around a {@code <Patterns>} tag and should be
* defined as {@code <Pattern type="xxx"></Pattern>}. Use "regex" to define
* a Regex formatted pattern or "string" for an exact matching pattern.
* @param xmlInputStream the {@code InputStream} used to read the patterns
* @throws JDOMException if an error occurred while parsing
* @throws IOException if an I/O error occurred while reading the {@code InputStream}
*/
protected void loadXMLPatternDefinitions(InputStream xmlInputStream) throws JDOMException, IOException {
strings = new ArrayList<BoyerMooreMatcher>();
patterns = new ArrayList<Pattern>();
SAXBuilder builder = new SAXBuilder();
Document doc = builder.build(xmlInputStream);
Element el = doc.getRootElement();
String value;
// now we have the <root> tag indexed so we can
// go ahead for boundaries and tests
for (Object obj : el.getChildren(TAG_PATTERN)) {
el = (Element)obj;
value = el.getText();
// Check if the pattern has been set to null
if (value != null && !value.isEmpty()) {
// Check if a regex expression has been set
if (el.getAttributeValue(TAG_PATTERN_TYPE).equalsIgnoreCase(TAG_PATTERN_TYPE_REGEX)) {
patterns.add(Pattern.compile(el.getText()));
// Otherwise it's by default an exact match model
} else {
strings.add(new BoyerMooreMatcher(el.getText()));
}
}
}
}
/**
* Search for an occurrence inside a specific content
* @param content the string content to search into
* @return the found occurrence or null if no match has been done
*/
public String findInContent(String content) {
// First check for a simple exact occurrence
for (BoyerMooreMatcher matcher : strings) {
if (matcher.findInContent(content) >= 0)
return matcher.getPattern();
}
// Then check for a regex occurrence
Matcher matcher;
for (Pattern pattern : patterns) {
matcher = pattern.matcher(content);
if (matcher.find()) {
return matcher.group();
}
}
// No match found return null
return null;
}
/**
* Search for all possible occurrences inside a specific content
* @param content the string content to search into
* @return a list of existing occurrences
*/
public List<String> findAllInContent(String content) {
List<String> results = new LinkedList<String>();
// First check for all simple exact occurrences
for (BoyerMooreMatcher matcher : strings) {
if (matcher.findInContent(content) >= 0)
results.add(matcher.getPattern());
}
// Then check for all regex occurrences
Matcher matcher;
for (Pattern pattern : patterns) {
matcher = pattern.matcher(content);
if (matcher.find()) {
results.add(content);
}
}
return results;
}
}