SpiderSVNEntriesParser.java example

Explorer

zaproxy-master
- src
  - ch
    - csnc
      - extension
        httpclient
        AliasCertificate.java
        AliasKeyManager.java
        PKCS11Configuration.java
        SSLContextManager.java
        ui
        AliasTableModel.java
        CertificateView.java
        DriverTableModel.java
        DriversView.java
        util
        DriverConfiguration.java
        Encoding.java
        OptionsParamExperimentalSliSupport.java
  - org
- test
  - ch
    - csnc
      - extension
        httpclient
        AliasCertificateUnitTest.java
        AliasKeyManagerUnitTest.java
        PKCS11ConfigurationUnitTest.java
        SSLContextManagerUnitTest.java
        util
        EncodingUnitTest.java
  - org
    - apache
      - commons
        httpclient
        HttpMethodBaseUnitTest.java
    - parosproxy
      - paros
        CommandLineUnitTest.java
        common
        AbstractParamUnitTest.java
        core
        scanner
        AbstractPluginUnitTest.java
        KbUnitTest.java
        NameValuePairUnitTest.java
        PluginFactoryUnitTest.java
        PluginTestUtils.java
        UtilUnitTest.java
        VariantCookieUnitTest.java
        VariantHeaderUnitTest.java
        VariantODataUnitTest.java
        model
        FileCopierUnitTest.java
        network
        HttpBodyUnitTest.java
        HttpRequestHeaderUnitTest.java
        HttpResponseHeaderUnitTest.java
    - zaproxy
      - zap
        VersionUnitTest.java
        WithConfigsTest.java
        authentication
        AuthenticationMethodIndicatorsUnitTest.java
        UsernamePasswordAuthenticationCredentialsUnitTest.java
        control
        AddOnCollectionUnitTest.java
        AddOnUnitTest.java
        ZapReleaseComparitorUnitTest.java
        ZapReleaseUnitTest.java
        extension
        alert
        ExtensionAlertUnitTest.java
        api
        APIUnitTest.java
        ApiResponseConversionUtilsUnitTest.java
        OptionsParamApiUnitTest.java
        authorization
        BasicAuthorizationDetectionMethodUnitTest.java
        brk
        impl
        http
        HttpBreakpointManagementDaemonImplUnitTest.java
        dynssl
        SslCertificateUtilsUnitTest.java
        ext
        ExtensionParamUnitTest.java
        httppanel
        view
        hex
        HttpPanelHexModelUnitTest.java
        util
        HttpTextViewUtilsUnitTest.java
        lang
        LangImporterUnitTest.java
        pscan
        PluginPassiveScannerUnitTest.java
        ruleconfig
        RuleConfigParamUnitTest.java
        model
        ContextUnitTest.java
        SessionUtilsUnitTest.java
        StandardParameterParserUnitTest.java
        VulnerabilitiesLoaderUnitTest.java
        network
        HttpBodyTestUtils.java
        HttpResponseBodyUnitTest.java
        spider
        URLCanonicalizerUnitTest.java
        URLResolverRfc1808ExamplesUnitTest.java
        URLResolverUnitTest.java
        filters
        DefaultFetchFilterUnitTest.java
        HttpPrefixFetchFilterUnitTest.java
        parser
        SpiderHtmlFormParserUnitTest.java
        SpiderHtmlParserUnitTest.java
        SpiderParserTestUtils.java
        SpiderSitemapXMLParserUnitTest.java
        SpiderTextParserUnitTest.java
        users
        UserUnitTest.java
        UsersTableModelUnitTest.java
        utils
        ApiUtilsUnitTest.java
        BoyerMooreMatcherUnitTest.java
        ByteBuilderUnitTest.java
        HirshbergMatcherUnitTest.java
        LocaleUtilsUnitTest.java
        XMLStringUtilUnitTest.java
        view
        AbstractMultipleOptionsBaseTableModelUnitTest.java
        JCheckBoxTreeUnitTest.java
        LayoutHelperUnitTest.java
        ListModelTestUtils.java
        TableModelTestUtils.java
        widgets
        UsersListModelUnitTest.java

/*
 * Zed Attack Proxy (ZAP) and its related class files.
 *
 * ZAP is an HTTP/HTTPS proxy for assessing web application security.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.zaproxy.zap.spider.parser;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;

import net.htmlparser.jericho.Source;

import org.parosproxy.paros.network.HttpMessage;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.zaproxy.zap.spider.SpiderParam;
import org.zaproxy.zap.utils.XmlUtils;

/**
 * The Class SpiderSVNEntriesParser is used for parsing SVN metadata, inclusing SVN "entries" and "wc.db" files.
 * @author 70pointer
 *
 */
public class SpiderSVNEntriesParser extends SpiderParser {
	/* this class was Cloned from SpiderRobotstxtParser, by Cosmin. Credit where credit is due. */
	
	/** a pattern to match for SQLite based file (in ".svn/wc.db") */
	private static final Pattern svnSQLiteFormatPattern = Pattern.compile ("^SQLite format ");
	
	/** a pattern to match for XML based entries files */
	private static final Pattern svnXMLFormatPattern = Pattern.compile("<wc-entries");

	/** matches the entry *after* the line containing the file name */
	private static final Pattern svnTextFormatFileOrDirectoryPattern = Pattern.compile("^(file|dir)$"); //case sensitive
	
	/** matches the lines containing the repo location  */
	private static final Pattern svnRepoLocationPattern = Pattern.compile("^(http://|https://)", Pattern.CASE_INSENSITIVE);
	
		
	/** The Spider parameters. */
	private SpiderParam params;
	
	/** used to parse the XML based .svn/entries file format */ 
	private static DocumentBuilder dBuilder;

	private Pattern SVN_ENTRIES_FILE_PATTERN = Pattern.compile("/\\.svn/entries$|/\\.svn/wc.db$");
	
	/** statically initialise the XML DocumentBuilder */
	static {
		try {
			dBuilder = XmlUtils.newXxeDisabledDocumentBuilderFactory().newDocumentBuilder();
		} catch (ParserConfigurationException e) {
			log.error(e);
		}
	}

	/**
	 * Instantiates a new spider SVN entries parser.
	 * 
	 * @param params the params
	 */
	public SpiderSVNEntriesParser(SpiderParam params) {
		super();
		this.params = params;
	}

	@Override
	public boolean parseResource(HttpMessage message, Source source, int depth) {
		if (message == null || !params.isParseSVNEntries()) {
			return false;
		}
		log.debug("Parsing an SVN resource...");
		
		// Get the response content
		String content = message.getResponseBody().toString();

		// Get the context (base url)
		String baseURL = message.getRequestHeader().getURI().toString();
				
		//there are 2 major formats of ".svn/entries" file. 
		//An XML version is used up to (and including) SVN working copy format 6 
		//from SVN working copy format 7, a more space efficient text based version is used.
		//The ".svn/entries" file format disappeared in SVN working copy format 12, in favour of 
		//a file called ".svn/wc.db" containing a sqlite database, so we parse this here as well.
		
		//which format are we parsing
		Matcher svnSQLiteFormatMatcher = svnSQLiteFormatPattern.matcher(content);
		Matcher svnXMLFormatMatcher = svnXMLFormatPattern.matcher(content);
		if (svnSQLiteFormatMatcher.find()) {
			//SQLite format is being used, ( >= SVN working copy format 12, or >= SVN 1.7)			
			File tempSqliteFile;
			try {
				//get the binary data, and put it in a temp file we can use with the SQLite JDBC driver
				//Note: File is not AutoClosable, so cannot use a "try with resources" to manage it
				tempSqliteFile = File.createTempFile("sqlite", null);
				tempSqliteFile.deleteOnExit();
				OutputStream fos = new FileOutputStream (tempSqliteFile);
				fos.write(message.getResponseBody().getBytes());
				fos.close();
				
				if ( log.isDebugEnabled() ) {
					org.sqlite.JDBC jdbcDriver = new org.sqlite.JDBC();
					log.debug ("Created a temporary SQLite database file '"+ tempSqliteFile+ "'");				
					log.debug("SQLite JDBC Driver is version " + jdbcDriver.getMajorVersion() + "." + jdbcDriver.getMinorVersion());
					}

				//now load the temporary SQLite file using JDBC, and query the file entries within.
				Class.forName("org.sqlite.JDBC"); 
				String sqliteConnectionUrl = "jdbc:sqlite:" + tempSqliteFile.getAbsolutePath();
				
				try (Connection conn = DriverManager.getConnection(sqliteConnectionUrl)) {
					if (conn != null) {
						Statement stmt = null;
						ResultSet rsSVNWCFormat=null;
						ResultSet rsNodes = null;
						ResultSet rsRepo = null;
						try {
							stmt = conn.createStatement();
							rsSVNWCFormat= stmt.executeQuery("pragma USER_VERSION");

							//get the precise internal version of SVN in use   
							//this will inform how the Spider recurse should proceed in an efficient manner.
							int svnFormat = 0;
							while (rsSVNWCFormat.next()) {
								if (log.isDebugEnabled()) log.debug("Got a row from 'pragma USER_VERSION'");
								svnFormat = rsSVNWCFormat.getInt(1);
								break;
							}
							if (svnFormat < 29) {
								throw new Exception ("The SVN Working Copy Format of the SQLite database should be >= 29. We found "+ svnFormat);
							}
							if (svnFormat > 31) {
								throw new Exception ("SVN Working Copy Format "+ svnFormat + " is not supported at this time.  We support up to and including format 31 (~ SVN 1.8.5)");
							}
							if ( log.isDebugEnabled() ) {
								log.debug("Internal SVN Working Copy Format for "+ tempSqliteFile + " is "+ svnFormat);
								log.debug("Refer to http://svn.apache.org/repos/asf/subversion/trunk/subversion/libsvn_wc/wc.h for more details!");
							}
							
							//allow future changes to be easily handled 
							switch (svnFormat) {
								case 29: case 30: case 31:
									rsNodes = stmt.executeQuery("select kind,local_relpath,'pristine/'||substr(checksum,7,2) || \"/\" || substr(checksum,7)|| \".svn-base\" from nodes order by wc_id");
									break;
							}
							
							//now get the list of files stored in the SVN repo (or this folder of the repo, depending the SVN working copy format in use) 
							while (rsNodes.next()) {
								if (log.isDebugEnabled()) log.debug("Got a Node from the SVN wc.db file (format " + svnFormat+ ")");
								String kind = rsNodes.getString(1);
								String filename = rsNodes.getString(2);
								String svn_filename = rsNodes.getString(3);
		
								if ( filename != null && filename.length() > 0 ) {
									log.debug("Found a file/directory name in the (SQLite based) SVN wc.db file");
		
									processURL(message, depth, "../" + filename + (kind.equals("dir")?"/":""), baseURL);
			
									//re-seed the spider for this directory.
									//this is not to do with the SVN version, but in case the SVN root is not the WEB root..
									//in order to be sure we catch all the SVN repos, we recurse.  
									if ( kind.equals("dir")) {
										processURL(message, depth, "../" + filename + "/.svn/wc.db", baseURL);
									}
									//if we have an internal SVN filename for the file, process it.
									//this will probably result in source code disclosure at some point.
									if ( kind.equals("file") && svn_filename != null && svn_filename.length() > 0 ) {
										processURL(message, depth, svn_filename, baseURL);
									}
								}
							}
							
							rsRepo = stmt.executeQuery("select root from REPOSITORY order by id");
							//get additional information on where the SVN repository is located
							while (rsRepo.next()) {
								if (log.isDebugEnabled()) log.debug("Got a potential Repository from the SVN wc.db file (format " + svnFormat+ ")");
								String repos_path = rsRepo.getString(1);
								if ( repos_path != null && repos_path.length() > 0 ) {
									//exclude local repositories here.. we cannot retrieve or spider them
									Matcher repoMatcher = svnRepoLocationPattern.matcher(repos_path);
									if ( repoMatcher.find() ) {
										log.debug("Found an SVN repository location in the (SQLite based) SVN wc.db file");
										processURL(message, depth, repos_path + "/", baseURL);	
									}
								}
							}							
						}
						catch (Exception e) {
							log.error ("Error executing SQL on temporary SVN SQLite database '"+ sqliteConnectionUrl + "': "+ e);
						}
						finally {
							//the JDBC driver in use does not play well with "try with resource" construct. I tried!
							if (rsRepo != null) rsRepo.close();
							if (rsNodes != null) rsNodes.close();
							if (rsSVNWCFormat != null) rsSVNWCFormat.close(); 			
							if (stmt != null) stmt.close();
						}
					}
				else 
					throw new SQLException ("Could not open a JDBC connection to SQLite file "+ tempSqliteFile.getAbsolutePath());
				} 
				catch (Exception e) {
					//the connection will have been closed already, since we're used a try with resources
					log.error ("Error parsing temporary SVN SQLite database "+ sqliteConnectionUrl);					
				}
				finally {
					//delete the temp file.
					//this will be deleted when the VM is shut down anyway, but better to be safe than to run out of disk space.				
					tempSqliteFile.delete();
				}

			} catch (IOException | ClassNotFoundException e) {
				log.error("An error occurred trying to set up to parse the SQLite based file: "+ e);
				// We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers
				return true;
			}
			
		} else if (svnXMLFormatMatcher.find()) {
			//XML format is being used, ( < SVN working copy format 7). 
			//The XML based file was replaced with the text based format with SVN 1.4, when format 8 went live
			//Not all the working copy formats went live in SVN versions, so tracking the format against the SVN version is tricky.
			
			Document doc;
			try {
				//work around the "no protocol" issue by wrapping the content in a ByteArrayInputStream
				doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(content.getBytes("utf-8"))));
			} catch (SAXException | IOException e) {
				log.error("An error occurred trying to parse the XML based .svn/entries file: "+ e);
				// We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers
				return true;
			}
			NodeList nodelist = doc.getElementsByTagName("entry");
			for ( int i=0; i< nodelist.getLength(); i++) {
				Node svnEntryNode = nodelist.item(i);				
				String svnEntryName = ((Element)svnEntryNode).getAttribute("name");
				String svnEntryKind = ((Element)svnEntryNode).getAttribute("kind");
				String svnEntryUrl = ((Element)svnEntryNode).getAttribute("url");
				String svnEntryCopyFromUrl = ((Element)svnEntryNode).getAttribute("copyfrom-url");				
				
				if ( svnEntryName != null && svnEntryName.length() > 0 ) {
					log.debug("Found a file/directory name in the (XML based) SVN < 1.4 entries file");
					processURL(message, depth, "../" + svnEntryName + (svnEntryKind.equals("dir")?"/":""), baseURL);
					//get the internal SVN file, probably leading to source code disclosure
					if ( svnEntryKind.equals("file") ) {						
						processURL(message, depth, "text-base/" + svnEntryName + ".svn-base", baseURL);
					}
					//re-seed the spider for this directory. 
					if ( svnEntryKind.equals("dir") ) {
						processURL(message, depth, "../" + svnEntryName + "/.svn/entries", baseURL);
					}
				}
				
				//expected to be true for the first entry only (the directory housing other entries)
				if ( svnEntryName != null && svnEntryName.length() == 0 && svnEntryKind.equals("dir") ) {
					//exclude local repositories here.. we cannot retrieve or spider them
					Matcher repoMatcher = svnRepoLocationPattern.matcher(svnEntryUrl);
					if ( repoMatcher.find() ) {
						log.debug("Found an SVN repository location in the (XML based) SVN < 1.4 entries file");
						processURL(message, depth, svnEntryUrl + "/", baseURL);
					}
				}				
				//this attribute seems to be set on various entries. Correspond to files, rather than directories 
				Matcher urlMatcher = svnRepoLocationPattern.matcher(svnEntryCopyFromUrl);
				if ( urlMatcher.find() ) {
					log.debug("Found an SVN URL in the (XML based) SVN < 1.4 entries file");
					processURL(message, depth, svnEntryCopyFromUrl , baseURL);
				}
				
			}
		}
		else	{			
			//text based format us being used, so >= SVN 1.4, and < SVN 1.7.x
			//Parse each line in the ".svn/entries" file
			//we cannot use the StringTokenizer approach used by the robots.txt logic, 
			//since this causes empty lines to be ignored, which causes problems...
			String previousline = null;	
			String [] lines = content.split("\n"); 
			for (String line : lines ) {
				// If the line is empty, skip it
				if (line.length() > 0) {
					
					//log.debug("Processing SVN entries line: " + line);
					
					Matcher matcher = svnTextFormatFileOrDirectoryPattern.matcher(line);
					if (matcher.find()) {
						//filetype is "dir" or "file", as per the contents of the SVN file.
						String filetype  = matcher.group(0);
						//the previous line actually contains the file/directory name.
						if ( previousline != null && previousline.length() > 0 ) {
							log.debug("Found a file/directory name in the (text based) SVN 1.4/1.5/1.6 SVN entries file");
						
							processURL(message, depth, "../" + previousline + (filetype.equals("dir")?"/":""), baseURL);
							//get the internal SVN file, probably leading to source code disclosure
							if ( filetype.equals("file") ) {
								processURL(message, depth, "text-base/" + previousline + ".svn-base", baseURL);
							}
							
							//re-seed the spider for this directory. 
							if ( filetype.equals("dir") ) {
								processURL(message, depth, "../" + previousline + "/.svn/entries", baseURL);
							}
						}
					} else {
						//not a "file" or "dir" line, but it may contain details of the SVN repo location
						Matcher repoMatcher = svnRepoLocationPattern.matcher(line);
						if (repoMatcher.find()) {
							log.debug("Found an SVN repository location in the (text based) 1.4/1.5/1.6 SVN entries file");
							
							processURL(message, depth, line + "/", baseURL);
						}
						
					} 
				} 
				//last thing to do is to record the line as the previous line for the next iteration.
				previousline = line;
			}
		}
		// We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers
		return true;
	}

	@Override
	public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyParsed) {
		// matches the file name of files that should be parsed with the SVN entries file parser
		Matcher matcher = SVN_ENTRIES_FILE_PATTERN.matcher(path);
		return matcher.find();
	}
}