TCLogParser.java example

Explorer

jmeter-master
- jmeter-trunk
  - src
  - test
    - src
      - org
        apache
        commons
        cli
        avalon
        ClutilTestCase.java
        jmeter
        JMeterVersionTest.java
        assertions
        DurationAssertionTest.java
        MD5HexAssertionTest.java
        ResponseAssertionTest.java
        SMIMEAssertionTest.java
        SizeAssertionTest.java
        XMLSchemaAssertionTest.java
        XPathAssertionTest.java
        XmlAssertionTest.java
        config
        TestCVSDataSet.java
        TestRandomVariableConfig.java
        gui
        TestArgumentsPanel.java
        control
        TestGenericController.java
        TestIfController.java
        TestInterleaveControl.java
        TestLoopController.java
        TestOnceOnlyController.java
        TestRandomController.java
        TestRandomOrderController.java
        TestRunTime.java
        TestSwitchController.java
        TestThroughputController.java
        TestTransactionController.java
        TestWhileController.java
        engine
        DistributedRunnerTest.java
        LocalHostTest.java
        TestTreeCloner.java
        util
        PackageTest.java
        TestValueReplacer.java
        extractor
        TestHtmlExtractorJSoup.java
        TestHtmlExtractorJodd.java
        TestJSONPostProcessor.java
        TestRegexExtractor.java
        TestXPathExtractor.java
        functions
        CSVReadFunctionTest.java
        ComponentReferenceFunctionTest.java
        EvalFunctionTest.java
        FunctionTestHelper.java
        PackageTest.java
        RandomFunctionTest.java
        SplitFunctionTest.java
        StringFromFileFunctionTest.java
        SumFunctionTest.java
        TestEscapeOroRegexpChars.java
        TestFileRowColContainer.java
        TestFileToString.java
        TestGroovyFunction.java
        TestJavascriptFunction.java
        TestJavascriptFunctionWithRhino.java
        TestJexl2Function.java
        TestJexl3Function.java
        TestMachineIPName.java
        TestRandomFromMultipleVars.java
        TestRegexFunction.java
        TestSamplerNameFunction.java
        TestSetProperty.java
        TestSimpleFunctions.java
        TestTimeFunction.java
        TestTimeShiftFunction.java
        TestUrlEncodeDecode.java
        VariableTest.java
        gui
        action
        TestLoad.java
        TestSave.java
        logging
        TestGuiLogEventAppender.java
        util
        JSyntaxTextAreaTest.java
        TestMenuFactory.java
        TristateCheckBoxTest.java
        junit
        JMeterTest.java
        JMeterTestCase.java
        JMeterTestCaseJUnit.java
        categories
        ExcludeCategoryFilter.java
        NeedGuiTests.java
        stubs
        TestSampler.java
        listeners
        TestResultAction.java
        protocol
        http
        config
        MultipartUrlConfigTest.java
        UrlConfigTest.java
        control
        TestAuthManager.java
        TestAuthorization.java
        TestCacheManagerBase.java
        TestCacheManagerHC4.java
        TestCacheManagerUrlConnection.java
        TestCacheManagerUrlConnectionBase.java
        TestDNSCacheManager.java
        TestHC4CookieManager.java
        TestHTTPMirrorThread.java
        gui
        TestHttpTestSampleGui.java
        modifier
        TestAnchorModifier.java
        TestURLRewritingModifier.java
        parser
        NotReusableParser.java
        ReusableParser.java
        TestBaseParser.java
        TestBug60842HtmlParser.java
        TestCssParser.java
        TestHTMLParser.java
        TestHtmlParsingUtils.java
        proxy
        NonGuiProxySample.java
        TestHttpRequestHdr.java
        TestProxyControl.java
        sampler
        HTTPNullSampler.java
        HTTPSampler3.java
        NullURLConnection.java
        PackageTest.java
        PostWriterTest.java
        PutWriterTest.java
        TestHTTPSamplers.java
        TestHTTPSamplersAgainstHttpMirrorServer.java
        TestHttpWebdav.java
        util
        TestHTTPArgument.java
        TestHTTPFileArg.java
        TestHTTPFileArgs.java
        TestHTTPUtils.java
        accesslog
        TestLogFilter.java
        TestSessionFilter.java
        TestTCLogParser.java
        visualizers
        RequestViewHTTPTest.java
        jms
        sampler
        PublisherSamplerTest.java
        render
        BinaryMessageRendererTest.java
        MessageRendererTest.java
        ObjectMessageRendererTest.java
        Person.java
        TextMessageRendererTest.java
        ldap
        config
        gui
        PackageTest.java
        tcp
        sampler
        BinaryTCPClientImplTest.java
        LengthPrefixedBinaryTCPClientImplTest.java
        TCPClientDecoratorTest.java
        report
        core
        CsvSampleReaderTest.java
        SampleMetadataTest.java
        TestCsvSampleWriter.java
        dashboard
        ApdexPerTransactionTest.java
        processor
        ErrorsSummaryConsumerTest.java
        FieldSampleComparatorTest.java
        reporters
        TestResultSaver.java
        resources
        PackageTest.java
        TestPropertiesFiles.java
        samplers
        NullSampler.java
        TestSampleResult.java
        TestSampleSaveConfiguration.java
        save
        TestCSVSaveService.java
        TestSaveService.java
        services
        TestFileServer.java
        test
        ResourceLocator.java
        testbeans
        gui
        PackageTest.java
        TestBooleanPropertyEditor.java
        TestComboStringEditor.java
        TestFieldStringEditor.java
        testelement
        PackageTest.java
        TestElementTest.java
        TestHeaderManager.java
        TestNumberProperty.java
        property
        AbstractPropertyTest.java
        PackageTest.java
        threads
        JMeterContextServiceHelper.java
        TestJMeterContextService.java
        TestTestCompiler.java
        timers
        ConstantThroughputTimerTest.java
        util
        JSR223TestElementTest.java
        LogRecord.java
        LogRecordingDelegatingLogger.java
        PackageTest.java
        StringUtilitiesTest.java
        TestJMeterUtils.java
        XPathUtilTest.java
        visualizers
        GenerateTreeGui.java
        TestRenderAsJson.java
        TestSampleCompareTo.java
        TestSamplingStatCalculator.java
        jorphan
        TestFunctorUsers.java
        TestXMLBuffer.java
        collections
        PackageTest.java
        exec
        TestKeyToolUtils.java
        gui
        ObjectTableModelTest.java
        ObjectTableSorterTest.java
        TableModelEventBacker.java
        math
        TestStatCalculator.java
        reflect
        TestClassFinder.java
        TestClassTools.java
        TestFunctor.java
        test
        AllTests.java
        util
        TestConverter.java
        TestJorphanUtils.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.apache.jmeter.protocol.http.util.accesslog;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.zip.GZIPInputStream;

import org.apache.jmeter.protocol.http.sampler.HTTPSamplerBase;
import org.apache.jmeter.testelement.TestElement;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;

// For JUnit tests, @see TestTCLogParser

/**
 * Description:<br>
 * <br>
 * Currently the parser only handles GET/POST requests. It's easy enough to add
 * support for other request methods by changing checkMethod. The is a complete
 * rewrite of a tool I wrote for myself earlier. The older algorithm was basic
 * and did not provide the same level of flexibility I want, so I wrote a new
 * one using a totally new algorithm. This implementation reads one line at a
 * time using BufferedReader. When it gets to the end of the file and the
 * sampler needs to get more requests, the parser will re-initialize the
 * BufferedReader. The implementation uses StringTokenizer to create tokens.
 * <p>
 * The parse algorithm is the following:
 * <ol>
 * <li> cleans the entry by looking for backslash "\"
 * <li> looks to see if GET or POST is in the line
 * <li> tokenizes using quotes "
 * <li> finds the token with the request method
 * <li> gets the string of the token and tokenizes it using space
 * <li> finds the first token beginning with slash character
 * <li> tokenizes the string using question mark "?"
 * <li> get the path from the first token
 * <li> returns the second token and checks it for parameters
 * <li> tokenizes the string using ampersand "&"
 * <li> parses each token to name/value pairs
 * </ol>
 * <p>
 * Extending this class is fairly simple. Most access logs use the same format
 * starting from the request method. Therefore, changing the implementation of
 * cleanURL(string) method should be sufficient to support new log formats.
 * Tomcat uses common log format, so any webserver that uses the format should
 * work with this parser. Servers that are known to use non standard formats are
 * IIS and Netscape.
 */

public class TCLogParser implements LogParser {
    protected static final Logger log = LoggerFactory.getLogger(TCLogParser.class);

    /*
     * TODO should these fields be public?
     * They don't appear to be used externally.
     * 
     * Also, are they any different from HTTPConstants.GET etc. ?
     * In some cases they seem to be used as the method name from the Tomcat log.
     * However the RMETHOD field is used as the value for HTTPSamplerBase.METHOD,
     * for which HTTPConstants is most approriate.
     */
    public static final String GET = "GET";

    public static final String POST = "POST";

    public static final String HEAD = "HEAD";

    /** protected members * */
    protected String RMETHOD = null;

    /**
     * The path to the access log file
     */
    protected String URL_PATH = null;

    protected boolean useFILE = true;

    protected File SOURCE = null;

    protected String FILENAME = null;

    protected BufferedReader READER = null;

    /**
     * Handles to supporting classes
     */
    protected Filter FILTER = null;

    /**
     * by default, we probably should decode the parameter values
     */
    protected boolean decode = true;

    // TODO downcase UPPER case non-final variables

    /**
     *
     */
    public TCLogParser() {
        super();
    }

    /**
     * @param source name of the source file
     */
    public TCLogParser(String source) {
        setSourceFile(source);
    }

    /**
     * by default decode is set to true. if the parameters shouldn't be
     * decoded, call the method with false
     * @param decodeparams flag whether parameters should be decoded
     */
    public void setDecodeParameterValues(boolean decodeparams) {
        this.decode = decodeparams;
    }

    /**
     * decode the parameter values is to true by default
     * @return <code>true</code> if parameter values should be decoded, <code>false</code> otherwise
     */
    public boolean decodeParameterValue() {
        return this.decode;
    }

    /**
     * Calls this method to set whether or not to use the path in the log. We
     * may want to provide the ability to filter the log file later on. By
     * default, the parser uses the file in the log.
     *
     * @param file
     *            flag whether to use the path from the log
     */
    public void setUseParsedFile(boolean file) {
        this.useFILE = file;
    }

    /**
     * Use the filter to include/exclude files in the access logs. This is
     * provided as a convenience and reduce the need to spend hours cleaning up
     * log files.
     *
     * @param filter {@link Filter} to be used while reading the log lines
     */
    @Override
    public void setFilter(Filter filter) {
        FILTER = filter;
    }

    /**
     * Sets the source file.
     *
     * @param source name of the source file
     */
    @Override
    public void setSourceFile(String source) {
        this.FILENAME = source;
    }

    /**
     * parse the entire file.
     *
     * @param el TestElement to read the lines into
     * @param parseCount number of max lines to read
     * @return number of read lines, or <code>-1</code> if an error occurred while reading
     */
    public int parse(TestElement el, int parseCount) {
        if (this.SOURCE == null) {
            this.SOURCE = new File(this.FILENAME);
        }
        try {
            if (this.READER == null) {
                this.READER = getReader(this.SOURCE);
            }
            return parse(this.READER, el, parseCount);
        } catch (Exception exception) {
            log.error("Problem creating samples", exception);
        }
        return -1;// indicate that an error occurred
    }

    private static BufferedReader getReader(File file) throws IOException {
        if (! isGZIP(file)) {
            return new BufferedReader(new FileReader(file));
        }
        GZIPInputStream in = new GZIPInputStream(new FileInputStream(file));
        return new BufferedReader(new InputStreamReader(in));
    }

    private static boolean isGZIP(File file) throws IOException {
        try (FileInputStream in = new FileInputStream(file)) {
            return in.read() == (GZIPInputStream.GZIP_MAGIC & 0xFF)
                && in.read() == (GZIPInputStream.GZIP_MAGIC >> 8);
        }
    }

    /**
     * parse a set number of lines from the access log. Keep in mind the number
     * of lines parsed will depend on the filter and number of lines in the log.
     * The method returns the actual number of lines parsed.
     *
     * @param count number of lines to read
     * @param el {@link TestElement} to read lines into
     * @return lines parsed
     */
    @Override
    public int parseAndConfigure(int count, TestElement el) {
        return this.parse(el, count);
    }

    /**
     * The method is responsible for reading each line, and breaking out of the
     * while loop if a set number of lines is given.<br>
     * Note: empty lines will not be counted
     *
     * @param breader {@link BufferedReader} to read lines from
     * @param el {@link TestElement} to read lines into
     * @param parseCount number of lines to read
     * @return number of lines parsed
     */
    protected int parse(BufferedReader breader, TestElement el, int parseCount) {
        int actualCount = 0;
        String line = null;
        try {
            // read one line at a time using
            // BufferedReader
            line = breader.readLine();
            while (line != null) {
                if (line.length() > 0) {
                    actualCount += this.parseLine(line, el);
                }
                // we check the count to see if we have exceeded
                // the number of lines to parse. There's no way
                // to know where to stop in the file. Therefore
                // we use break to escape the while loop when
                // we've reached the count.
                if (parseCount != -1 && actualCount >= parseCount) {
                    break;
                }
                line = breader.readLine();
            }
            if (line == null) {
                breader.close();
                this.READER = null;
                // this.READER = new BufferedReader(new
                // FileReader(this.SOURCE));
                // parse(this.READER,el);
            }
        } catch (IOException ioe) {
            log.error("Error reading log file", ioe);
        }
        return actualCount;
    }

    /**
     * parseLine calls the other parse methods to parse the given text.
     *
     * @param line single line to be parsed
     * @param el {@link TestElement} in which the line will be added
     * @return number of lines parsed (zero or one, actually)
     */
    protected int parseLine(String line, TestElement el) {
        int count = 0;
        // we clean the line to get
        // rid of extra stuff
        String cleanedLine = this.cleanURL(line);
        log.debug("parsing line: " + line);
        // now we set request method
        el.setProperty(HTTPSamplerBase.METHOD, RMETHOD);
        if (FILTER != null) {
            log.debug("filter is not null");
            if (!FILTER.isFiltered(line,el)) {
                log.debug("line was not filtered");
                // increment the current count
                count++;
                // we filter the line first, before we try
                // to separate the URL into file and
                // parameters.
                line = FILTER.filter(cleanedLine);
                if (line != null) {
                    createUrl(line, el);
                }
            } else {
                log.debug("Line was filtered");
            }
        } else {
            log.debug("filter was null");
            // increment the current count
            count++;
            // in the case when the filter is not set, we
            // parse all the lines
            createUrl(cleanedLine, el);
        }
        return count;
    }

    /**
     * @param line single line of which the url should be extracted 
     * @param el {@link TestElement} into which the url will be added
     */
    private void createUrl(String line, TestElement el) {
        String paramString = null;
        // check the URL for "?" symbol
        paramString = this.stripFile(line, el);
        if (paramString != null) {
            this.checkParamFormat(line);
            // now that we have stripped the file, we can parse the parameters
            this.convertStringToJMRequest(paramString, el);
        }
    }

    /**
     * The method cleans the URL using the following algorithm.
     * <ol>
     * <li> check for double quotes
     * <li> check the request method
     * <li> tokenize using double quotes
     * <li> find first token containing request method
     * <li> tokenize string using space
     * <li> find first token that begins with "/"
     * </ol>
     * Example Tomcat log entry:
     * <p>
     * 127.0.0.1 - - [08/Jan/2003:07:03:54 -0500] "GET /addrbook/ HTTP/1.1" 200
     * 1981
     * <p>
     * would result in the extracted url <code>/addrbook/</code>
     *
     * @param entry line from which the url is to be extracted
     * @return cleaned url
     */
    public String cleanURL(String entry) {
        String url = entry;
        if (entry.contains("\"") && checkMethod(entry)) {
            // we tokenize using double quotes. this means
            // for tomcat we should have 3 tokens if there
            // isn't any additional information in the logs
            StringTokenizer tokens = this.tokenize(entry, "\"");
            while (tokens.hasMoreTokens()) {
                String token = tokens.nextToken();
                if (checkMethod(token)) {
                    // we tokenzie it using space and escape
                    // the while loop. Only the first matching
                    // token will be used
                    StringTokenizer token2 = this.tokenize(token, " ");
                    while (token2.hasMoreTokens()) {
                        String t = (String) token2.nextElement();
                        if (t.equalsIgnoreCase(GET)) {
                            RMETHOD = GET;
                        } else if (t.equalsIgnoreCase(POST)) {
                            RMETHOD = POST;
                        } else if (t.equalsIgnoreCase(HEAD)) {
                            RMETHOD = HEAD;
                        }
                        // there should only be one token
                        // that starts with slash character
                        if (t.startsWith("/")) {
                            url = t;
                            break;
                        }
                    }
                    break;
                }
            }
            return url;
        }
        // we return the original string
        return url;
    }

    /**
     * The method checks for <code>POST</code>, <code>GET</code> and <code>HEAD</code> methods currently.
     * The other methods aren't supported yet.
     *
     * @param text text to be checked for HTTP method
     * @return <code>true</code> if method is supported, <code>false</code> otherwise
     */
    public boolean checkMethod(String text) {
        if (text.contains("GET")) {
            this.RMETHOD = GET;
            return true;
        } else if (text.contains("POST")) {
            this.RMETHOD = POST;
            return true;
        } else if (text.contains("HEAD")) {
            this.RMETHOD = HEAD;
            return true;
        } else {
            return false;
        }
    }

    /**
     * Tokenize the URL into two tokens. If the URL has more than one "?", the
     * parse may fail. Only the first two tokens are used. The first token is
     * automatically parsed and set at {@link TCLogParser#URL_PATH URL_PATH}.
     *
     * @param url url which should be stripped from parameters
     * @param el {@link TestElement} to parse url into
     * @return String presenting the parameters, or <code>null</code> when none where found
     */
    public String stripFile(String url, TestElement el) {
        if (url.contains("?")) {
            StringTokenizer tokens = this.tokenize(url, "?");
            this.URL_PATH = tokens.nextToken();
            el.setProperty(HTTPSamplerBase.PATH, URL_PATH);
            return tokens.hasMoreTokens() ? tokens.nextToken() : null;
        }
        el.setProperty(HTTPSamplerBase.PATH, url);
        return null;
    }

    /**
     * Checks the string to make sure it has <code>/path/file?name=value</code> format. If
     * the string doesn't contains a "?", it will return <code>false</code>.
     *
     * @param url url to check for parameters
     * @return <code>true</code> if url contains a <code>?</code>,
     *         <code>false</code> otherwise
     */
    public boolean checkURL(String url) {
        return url.contains("?");
    }

    /**
     * Checks the string to see if it contains "&" and "=". If it does, return
     * <code>true</code>, so that it can be parsed.
     *
     * @param text text to be checked for <code>&</code> and <code>=</code>
     * @return <code>true</code> if <code>text</code> contains both <code>&</code>
     *         and <code>=</code>, <code>false</code> otherwise
     */
    public boolean checkParamFormat(String text) {
        return text.contains("&") && text.contains("=");
    }

    /**
     * Convert a single line into XML
     *
     * @param text to be converted
     * @param el {@link HTTPSamplerBase} which consumes the <code>text</code>
     */
    public void convertStringToJMRequest(String text, TestElement el) {
        ((HTTPSamplerBase) el).parseArguments(text);
    }

    /**
     * Parse the string parameters into NVPair[] array. Once they are parsed, it
     * is returned. The method uses parseOneParameter(string) to convert each
     * pair.
     *
     * @param stringparams String with parameters to be parsed
     * @return array of {@link NVPair}s
     */
    public NVPair[] convertStringtoNVPair(String stringparams) {
        List<String> vparams = this.parseParameters(stringparams);
        NVPair[] nvparams = new NVPair[vparams.size()];
        // convert the Parameters
        for (int idx = 0; idx < nvparams.length; idx++) {
            nvparams[idx] = this.parseOneParameter(vparams.get(idx));
        }
        return nvparams;
    }

    /**
     * Method expects name and value to be separated by an equal sign "=". The
     * method uses StringTokenizer to make a NVPair object. If there happens to
     * be more than one "=" sign, the others are ignored. The chance of a string
     * containing more than one is unlikely and would not conform to HTTP spec.
     * I should double check the protocol spec to make sure this is accurate.
     *
     * @param parameter
     *            to be parsed
     * @return {@link NVPair} with the parsed name and value of the parameter
     */
    protected NVPair parseOneParameter(String parameter) {
        String name = ""; // avoid possible NPE when trimming the name
        String value = null;
        try {
            StringTokenizer param = this.tokenize(parameter, "=");
            name = param.nextToken();
            value = param.nextToken();
        } catch (Exception e) {
            // do nothing. it's naive, but since
            // the utility is meant to parse access
            // logs the formatting should be correct
        }
        if (value == null) {
            value = "";
        } else {
            if (decode) {
                try {
                    value = URLDecoder.decode(value, StandardCharsets.UTF_8.name());
                } catch (UnsupportedEncodingException e) {
                    log.warn(e.getMessage());
                }
            }
        }
        return new NVPair(name.trim(), value.trim());
    }

    /**
     * Method uses StringTokenizer to convert the string into single pairs. The
     * string should conform to HTTP protocol spec, which means the name/value
     * pairs are separated by the ampersand symbol "&". Someone could write the
     * querystrings by hand, but that would be round about and go against the
     * purpose of this utility.
     *
     * @param parameters string to be parsed
     * @return List of name/value pairs
     */
    protected List<String> parseParameters(String parameters) {
        List<String> parsedParams = new ArrayList<>();
        StringTokenizer paramtokens = this.tokenize(parameters, "&");
        while (paramtokens.hasMoreElements()) {
            parsedParams.add(paramtokens.nextToken());
        }
        return parsedParams;
    }

    /**
     * Parses the line using java.util.StringTokenizer.
     *
     * @param line
     *            line to be parsed
     * @param delim
     *            delimiter
     * @return StringTokenizer constructed with <code>line</code> and <code>delim</code>
     */
    public StringTokenizer tokenize(String line, String delim) {
        return new StringTokenizer(line, delim);
    }

    @Override
    public void close() {
        try {
            this.READER.close();
            this.READER = null;
            this.SOURCE = null;
        } catch (IOException e) {
            // do nothing
        }
    }
}