/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.jmeter.protocol.http.util.accesslog;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.zip.GZIPInputStream;
import org.apache.jmeter.protocol.http.sampler.HTTPSamplerBase;
import org.apache.jmeter.testelement.TestElement;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
// For JUnit tests, @see TestTCLogParser
/**
* Description:<br>
* <br>
* Currently the parser only handles GET/POST requests. It's easy enough to add
* support for other request methods by changing checkMethod. The is a complete
* rewrite of a tool I wrote for myself earlier. The older algorithm was basic
* and did not provide the same level of flexibility I want, so I wrote a new
* one using a totally new algorithm. This implementation reads one line at a
* time using BufferedReader. When it gets to the end of the file and the
* sampler needs to get more requests, the parser will re-initialize the
* BufferedReader. The implementation uses StringTokenizer to create tokens.
* <p>
* The parse algorithm is the following:
* <ol>
* <li> cleans the entry by looking for backslash "\"
* <li> looks to see if GET or POST is in the line
* <li> tokenizes using quotes "
* <li> finds the token with the request method
* <li> gets the string of the token and tokenizes it using space
* <li> finds the first token beginning with slash character
* <li> tokenizes the string using question mark "?"
* <li> get the path from the first token
* <li> returns the second token and checks it for parameters
* <li> tokenizes the string using ampersand "&"
* <li> parses each token to name/value pairs
* </ol>
* <p>
* Extending this class is fairly simple. Most access logs use the same format
* starting from the request method. Therefore, changing the implementation of
* cleanURL(string) method should be sufficient to support new log formats.
* Tomcat uses common log format, so any webserver that uses the format should
* work with this parser. Servers that are known to use non standard formats are
* IIS and Netscape.
*/
public class TCLogParser implements LogParser {
protected static final Logger log = LoggerFactory.getLogger(TCLogParser.class);
/*
* TODO should these fields be public?
* They don't appear to be used externally.
*
* Also, are they any different from HTTPConstants.GET etc. ?
* In some cases they seem to be used as the method name from the Tomcat log.
* However the RMETHOD field is used as the value for HTTPSamplerBase.METHOD,
* for which HTTPConstants is most approriate.
*/
public static final String GET = "GET";
public static final String POST = "POST";
public static final String HEAD = "HEAD";
/** protected members * */
protected String RMETHOD = null;
/**
* The path to the access log file
*/
protected String URL_PATH = null;
protected boolean useFILE = true;
protected File SOURCE = null;
protected String FILENAME = null;
protected BufferedReader READER = null;
/**
* Handles to supporting classes
*/
protected Filter FILTER = null;
/**
* by default, we probably should decode the parameter values
*/
protected boolean decode = true;
// TODO downcase UPPER case non-final variables
/**
*
*/
public TCLogParser() {
super();
}
/**
* @param source name of the source file
*/
public TCLogParser(String source) {
setSourceFile(source);
}
/**
* by default decode is set to true. if the parameters shouldn't be
* decoded, call the method with false
* @param decodeparams flag whether parameters should be decoded
*/
public void setDecodeParameterValues(boolean decodeparams) {
this.decode = decodeparams;
}
/**
* decode the parameter values is to true by default
* @return <code>true</code> if parameter values should be decoded, <code>false</code> otherwise
*/
public boolean decodeParameterValue() {
return this.decode;
}
/**
* Calls this method to set whether or not to use the path in the log. We
* may want to provide the ability to filter the log file later on. By
* default, the parser uses the file in the log.
*
* @param file
* flag whether to use the path from the log
*/
public void setUseParsedFile(boolean file) {
this.useFILE = file;
}
/**
* Use the filter to include/exclude files in the access logs. This is
* provided as a convenience and reduce the need to spend hours cleaning up
* log files.
*
* @param filter {@link Filter} to be used while reading the log lines
*/
@Override
public void setFilter(Filter filter) {
FILTER = filter;
}
/**
* Sets the source file.
*
* @param source name of the source file
*/
@Override
public void setSourceFile(String source) {
this.FILENAME = source;
}
/**
* parse the entire file.
*
* @param el TestElement to read the lines into
* @param parseCount number of max lines to read
* @return number of read lines, or <code>-1</code> if an error occurred while reading
*/
public int parse(TestElement el, int parseCount) {
if (this.SOURCE == null) {
this.SOURCE = new File(this.FILENAME);
}
try {
if (this.READER == null) {
this.READER = getReader(this.SOURCE);
}
return parse(this.READER, el, parseCount);
} catch (Exception exception) {
log.error("Problem creating samples", exception);
}
return -1;// indicate that an error occurred
}
private static BufferedReader getReader(File file) throws IOException {
if (! isGZIP(file)) {
return new BufferedReader(new FileReader(file));
}
GZIPInputStream in = new GZIPInputStream(new FileInputStream(file));
return new BufferedReader(new InputStreamReader(in));
}
private static boolean isGZIP(File file) throws IOException {
try (FileInputStream in = new FileInputStream(file)) {
return in.read() == (GZIPInputStream.GZIP_MAGIC & 0xFF)
&& in.read() == (GZIPInputStream.GZIP_MAGIC >> 8);
}
}
/**
* parse a set number of lines from the access log. Keep in mind the number
* of lines parsed will depend on the filter and number of lines in the log.
* The method returns the actual number of lines parsed.
*
* @param count number of lines to read
* @param el {@link TestElement} to read lines into
* @return lines parsed
*/
@Override
public int parseAndConfigure(int count, TestElement el) {
return this.parse(el, count);
}
/**
* The method is responsible for reading each line, and breaking out of the
* while loop if a set number of lines is given.<br>
* Note: empty lines will not be counted
*
* @param breader {@link BufferedReader} to read lines from
* @param el {@link TestElement} to read lines into
* @param parseCount number of lines to read
* @return number of lines parsed
*/
protected int parse(BufferedReader breader, TestElement el, int parseCount) {
int actualCount = 0;
String line = null;
try {
// read one line at a time using
// BufferedReader
line = breader.readLine();
while (line != null) {
if (line.length() > 0) {
actualCount += this.parseLine(line, el);
}
// we check the count to see if we have exceeded
// the number of lines to parse. There's no way
// to know where to stop in the file. Therefore
// we use break to escape the while loop when
// we've reached the count.
if (parseCount != -1 && actualCount >= parseCount) {
break;
}
line = breader.readLine();
}
if (line == null) {
breader.close();
this.READER = null;
// this.READER = new BufferedReader(new
// FileReader(this.SOURCE));
// parse(this.READER,el);
}
} catch (IOException ioe) {
log.error("Error reading log file", ioe);
}
return actualCount;
}
/**
* parseLine calls the other parse methods to parse the given text.
*
* @param line single line to be parsed
* @param el {@link TestElement} in which the line will be added
* @return number of lines parsed (zero or one, actually)
*/
protected int parseLine(String line, TestElement el) {
int count = 0;
// we clean the line to get
// rid of extra stuff
String cleanedLine = this.cleanURL(line);
log.debug("parsing line: " + line);
// now we set request method
el.setProperty(HTTPSamplerBase.METHOD, RMETHOD);
if (FILTER != null) {
log.debug("filter is not null");
if (!FILTER.isFiltered(line,el)) {
log.debug("line was not filtered");
// increment the current count
count++;
// we filter the line first, before we try
// to separate the URL into file and
// parameters.
line = FILTER.filter(cleanedLine);
if (line != null) {
createUrl(line, el);
}
} else {
log.debug("Line was filtered");
}
} else {
log.debug("filter was null");
// increment the current count
count++;
// in the case when the filter is not set, we
// parse all the lines
createUrl(cleanedLine, el);
}
return count;
}
/**
* @param line single line of which the url should be extracted
* @param el {@link TestElement} into which the url will be added
*/
private void createUrl(String line, TestElement el) {
String paramString = null;
// check the URL for "?" symbol
paramString = this.stripFile(line, el);
if (paramString != null) {
this.checkParamFormat(line);
// now that we have stripped the file, we can parse the parameters
this.convertStringToJMRequest(paramString, el);
}
}
/**
* The method cleans the URL using the following algorithm.
* <ol>
* <li> check for double quotes
* <li> check the request method
* <li> tokenize using double quotes
* <li> find first token containing request method
* <li> tokenize string using space
* <li> find first token that begins with "/"
* </ol>
* Example Tomcat log entry:
* <p>
* 127.0.0.1 - - [08/Jan/2003:07:03:54 -0500] "GET /addrbook/ HTTP/1.1" 200
* 1981
* <p>
* would result in the extracted url <code>/addrbook/</code>
*
* @param entry line from which the url is to be extracted
* @return cleaned url
*/
public String cleanURL(String entry) {
String url = entry;
if (entry.contains("\"") && checkMethod(entry)) {
// we tokenize using double quotes. this means
// for tomcat we should have 3 tokens if there
// isn't any additional information in the logs
StringTokenizer tokens = this.tokenize(entry, "\"");
while (tokens.hasMoreTokens()) {
String token = tokens.nextToken();
if (checkMethod(token)) {
// we tokenzie it using space and escape
// the while loop. Only the first matching
// token will be used
StringTokenizer token2 = this.tokenize(token, " ");
while (token2.hasMoreTokens()) {
String t = (String) token2.nextElement();
if (t.equalsIgnoreCase(GET)) {
RMETHOD = GET;
} else if (t.equalsIgnoreCase(POST)) {
RMETHOD = POST;
} else if (t.equalsIgnoreCase(HEAD)) {
RMETHOD = HEAD;
}
// there should only be one token
// that starts with slash character
if (t.startsWith("/")) {
url = t;
break;
}
}
break;
}
}
return url;
}
// we return the original string
return url;
}
/**
* The method checks for <code>POST</code>, <code>GET</code> and <code>HEAD</code> methods currently.
* The other methods aren't supported yet.
*
* @param text text to be checked for HTTP method
* @return <code>true</code> if method is supported, <code>false</code> otherwise
*/
public boolean checkMethod(String text) {
if (text.contains("GET")) {
this.RMETHOD = GET;
return true;
} else if (text.contains("POST")) {
this.RMETHOD = POST;
return true;
} else if (text.contains("HEAD")) {
this.RMETHOD = HEAD;
return true;
} else {
return false;
}
}
/**
* Tokenize the URL into two tokens. If the URL has more than one "?", the
* parse may fail. Only the first two tokens are used. The first token is
* automatically parsed and set at {@link TCLogParser#URL_PATH URL_PATH}.
*
* @param url url which should be stripped from parameters
* @param el {@link TestElement} to parse url into
* @return String presenting the parameters, or <code>null</code> when none where found
*/
public String stripFile(String url, TestElement el) {
if (url.contains("?")) {
StringTokenizer tokens = this.tokenize(url, "?");
this.URL_PATH = tokens.nextToken();
el.setProperty(HTTPSamplerBase.PATH, URL_PATH);
return tokens.hasMoreTokens() ? tokens.nextToken() : null;
}
el.setProperty(HTTPSamplerBase.PATH, url);
return null;
}
/**
* Checks the string to make sure it has <code>/path/file?name=value</code> format. If
* the string doesn't contains a "?", it will return <code>false</code>.
*
* @param url url to check for parameters
* @return <code>true</code> if url contains a <code>?</code>,
* <code>false</code> otherwise
*/
public boolean checkURL(String url) {
return url.contains("?");
}
/**
* Checks the string to see if it contains "&" and "=". If it does, return
* <code>true</code>, so that it can be parsed.
*
* @param text text to be checked for <code>&</code> and <code>=</code>
* @return <code>true</code> if <code>text</code> contains both <code>&</code>
* and <code>=</code>, <code>false</code> otherwise
*/
public boolean checkParamFormat(String text) {
return text.contains("&") && text.contains("=");
}
/**
* Convert a single line into XML
*
* @param text to be converted
* @param el {@link HTTPSamplerBase} which consumes the <code>text</code>
*/
public void convertStringToJMRequest(String text, TestElement el) {
((HTTPSamplerBase) el).parseArguments(text);
}
/**
* Parse the string parameters into NVPair[] array. Once they are parsed, it
* is returned. The method uses parseOneParameter(string) to convert each
* pair.
*
* @param stringparams String with parameters to be parsed
* @return array of {@link NVPair}s
*/
public NVPair[] convertStringtoNVPair(String stringparams) {
List<String> vparams = this.parseParameters(stringparams);
NVPair[] nvparams = new NVPair[vparams.size()];
// convert the Parameters
for (int idx = 0; idx < nvparams.length; idx++) {
nvparams[idx] = this.parseOneParameter(vparams.get(idx));
}
return nvparams;
}
/**
* Method expects name and value to be separated by an equal sign "=". The
* method uses StringTokenizer to make a NVPair object. If there happens to
* be more than one "=" sign, the others are ignored. The chance of a string
* containing more than one is unlikely and would not conform to HTTP spec.
* I should double check the protocol spec to make sure this is accurate.
*
* @param parameter
* to be parsed
* @return {@link NVPair} with the parsed name and value of the parameter
*/
protected NVPair parseOneParameter(String parameter) {
String name = ""; // avoid possible NPE when trimming the name
String value = null;
try {
StringTokenizer param = this.tokenize(parameter, "=");
name = param.nextToken();
value = param.nextToken();
} catch (Exception e) {
// do nothing. it's naive, but since
// the utility is meant to parse access
// logs the formatting should be correct
}
if (value == null) {
value = "";
} else {
if (decode) {
try {
value = URLDecoder.decode(value, StandardCharsets.UTF_8.name());
} catch (UnsupportedEncodingException e) {
log.warn(e.getMessage());
}
}
}
return new NVPair(name.trim(), value.trim());
}
/**
* Method uses StringTokenizer to convert the string into single pairs. The
* string should conform to HTTP protocol spec, which means the name/value
* pairs are separated by the ampersand symbol "&". Someone could write the
* querystrings by hand, but that would be round about and go against the
* purpose of this utility.
*
* @param parameters string to be parsed
* @return List of name/value pairs
*/
protected List<String> parseParameters(String parameters) {
List<String> parsedParams = new ArrayList<>();
StringTokenizer paramtokens = this.tokenize(parameters, "&");
while (paramtokens.hasMoreElements()) {
parsedParams.add(paramtokens.nextToken());
}
return parsedParams;
}
/**
* Parses the line using java.util.StringTokenizer.
*
* @param line
* line to be parsed
* @param delim
* delimiter
* @return StringTokenizer constructed with <code>line</code> and <code>delim</code>
*/
public StringTokenizer tokenize(String line, String delim) {
return new StringTokenizer(line, delim);
}
@Override
public void close() {
try {
this.READER.close();
this.READER = null;
this.SOURCE = null;
} catch (IOException e) {
// do nothing
}
}
}