/* * Zed Attack Proxy (ZAP) and its related class files. * * ZAP is an HTTP/HTTPS proxy for assessing web application security. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.zaproxy.zap.spider.parser; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; import net.htmlparser.jericho.Source; import org.parosproxy.paros.network.HttpMessage; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.zaproxy.zap.spider.SpiderParam; import org.zaproxy.zap.utils.XmlUtils; /** * The Class SpiderSVNEntriesParser is used for parsing SVN metadata, inclusing SVN "entries" and "wc.db" files. * @author 70pointer * */ public class SpiderSVNEntriesParser extends SpiderParser { /* this class was Cloned from SpiderRobotstxtParser, by Cosmin. Credit where credit is due. */ /** a pattern to match for SQLite based file (in ".svn/wc.db") */ private static final Pattern svnSQLiteFormatPattern = Pattern.compile ("^SQLite format "); /** a pattern to match for XML based entries files */ private static final Pattern svnXMLFormatPattern = Pattern.compile("<wc-entries"); /** matches the entry *after* the line containing the file name */ private static final Pattern svnTextFormatFileOrDirectoryPattern = Pattern.compile("^(file|dir)$"); //case sensitive /** matches the lines containing the repo location */ private static final Pattern svnRepoLocationPattern = Pattern.compile("^(http://|https://)", Pattern.CASE_INSENSITIVE); /** The Spider parameters. */ private SpiderParam params; /** used to parse the XML based .svn/entries file format */ private static DocumentBuilder dBuilder; private Pattern SVN_ENTRIES_FILE_PATTERN = Pattern.compile("/\\.svn/entries$|/\\.svn/wc.db$"); /** statically initialise the XML DocumentBuilder */ static { try { dBuilder = XmlUtils.newXxeDisabledDocumentBuilderFactory().newDocumentBuilder(); } catch (ParserConfigurationException e) { log.error(e); } } /** * Instantiates a new spider SVN entries parser. * * @param params the params */ public SpiderSVNEntriesParser(SpiderParam params) { super(); this.params = params; } @Override public boolean parseResource(HttpMessage message, Source source, int depth) { if (message == null || !params.isParseSVNEntries()) { return false; } log.debug("Parsing an SVN resource..."); // Get the response content String content = message.getResponseBody().toString(); // Get the context (base url) String baseURL = message.getRequestHeader().getURI().toString(); //there are 2 major formats of ".svn/entries" file. //An XML version is used up to (and including) SVN working copy format 6 //from SVN working copy format 7, a more space efficient text based version is used. //The ".svn/entries" file format disappeared in SVN working copy format 12, in favour of //a file called ".svn/wc.db" containing a sqlite database, so we parse this here as well. //which format are we parsing Matcher svnSQLiteFormatMatcher = svnSQLiteFormatPattern.matcher(content); Matcher svnXMLFormatMatcher = svnXMLFormatPattern.matcher(content); if (svnSQLiteFormatMatcher.find()) { //SQLite format is being used, ( >= SVN working copy format 12, or >= SVN 1.7) File tempSqliteFile; try { //get the binary data, and put it in a temp file we can use with the SQLite JDBC driver //Note: File is not AutoClosable, so cannot use a "try with resources" to manage it tempSqliteFile = File.createTempFile("sqlite", null); tempSqliteFile.deleteOnExit(); OutputStream fos = new FileOutputStream (tempSqliteFile); fos.write(message.getResponseBody().getBytes()); fos.close(); if ( log.isDebugEnabled() ) { org.sqlite.JDBC jdbcDriver = new org.sqlite.JDBC(); log.debug ("Created a temporary SQLite database file '"+ tempSqliteFile+ "'"); log.debug("SQLite JDBC Driver is version " + jdbcDriver.getMajorVersion() + "." + jdbcDriver.getMinorVersion()); } //now load the temporary SQLite file using JDBC, and query the file entries within. Class.forName("org.sqlite.JDBC"); String sqliteConnectionUrl = "jdbc:sqlite:" + tempSqliteFile.getAbsolutePath(); try (Connection conn = DriverManager.getConnection(sqliteConnectionUrl)) { if (conn != null) { Statement stmt = null; ResultSet rsSVNWCFormat=null; ResultSet rsNodes = null; ResultSet rsRepo = null; try { stmt = conn.createStatement(); rsSVNWCFormat= stmt.executeQuery("pragma USER_VERSION"); //get the precise internal version of SVN in use //this will inform how the Spider recurse should proceed in an efficient manner. int svnFormat = 0; while (rsSVNWCFormat.next()) { if (log.isDebugEnabled()) log.debug("Got a row from 'pragma USER_VERSION'"); svnFormat = rsSVNWCFormat.getInt(1); break; } if (svnFormat < 29) { throw new Exception ("The SVN Working Copy Format of the SQLite database should be >= 29. We found "+ svnFormat); } if (svnFormat > 31) { throw new Exception ("SVN Working Copy Format "+ svnFormat + " is not supported at this time. We support up to and including format 31 (~ SVN 1.8.5)"); } if ( log.isDebugEnabled() ) { log.debug("Internal SVN Working Copy Format for "+ tempSqliteFile + " is "+ svnFormat); log.debug("Refer to http://svn.apache.org/repos/asf/subversion/trunk/subversion/libsvn_wc/wc.h for more details!"); } //allow future changes to be easily handled switch (svnFormat) { case 29: case 30: case 31: rsNodes = stmt.executeQuery("select kind,local_relpath,'pristine/'||substr(checksum,7,2) || \"/\" || substr(checksum,7)|| \".svn-base\" from nodes order by wc_id"); break; } //now get the list of files stored in the SVN repo (or this folder of the repo, depending the SVN working copy format in use) while (rsNodes.next()) { if (log.isDebugEnabled()) log.debug("Got a Node from the SVN wc.db file (format " + svnFormat+ ")"); String kind = rsNodes.getString(1); String filename = rsNodes.getString(2); String svn_filename = rsNodes.getString(3); if ( filename != null && filename.length() > 0 ) { log.debug("Found a file/directory name in the (SQLite based) SVN wc.db file"); processURL(message, depth, "../" + filename + (kind.equals("dir")?"/":""), baseURL); //re-seed the spider for this directory. //this is not to do with the SVN version, but in case the SVN root is not the WEB root.. //in order to be sure we catch all the SVN repos, we recurse. if ( kind.equals("dir")) { processURL(message, depth, "../" + filename + "/.svn/wc.db", baseURL); } //if we have an internal SVN filename for the file, process it. //this will probably result in source code disclosure at some point. if ( kind.equals("file") && svn_filename != null && svn_filename.length() > 0 ) { processURL(message, depth, svn_filename, baseURL); } } } rsRepo = stmt.executeQuery("select root from REPOSITORY order by id"); //get additional information on where the SVN repository is located while (rsRepo.next()) { if (log.isDebugEnabled()) log.debug("Got a potential Repository from the SVN wc.db file (format " + svnFormat+ ")"); String repos_path = rsRepo.getString(1); if ( repos_path != null && repos_path.length() > 0 ) { //exclude local repositories here.. we cannot retrieve or spider them Matcher repoMatcher = svnRepoLocationPattern.matcher(repos_path); if ( repoMatcher.find() ) { log.debug("Found an SVN repository location in the (SQLite based) SVN wc.db file"); processURL(message, depth, repos_path + "/", baseURL); } } } } catch (Exception e) { log.error ("Error executing SQL on temporary SVN SQLite database '"+ sqliteConnectionUrl + "': "+ e); } finally { //the JDBC driver in use does not play well with "try with resource" construct. I tried! if (rsRepo != null) rsRepo.close(); if (rsNodes != null) rsNodes.close(); if (rsSVNWCFormat != null) rsSVNWCFormat.close(); if (stmt != null) stmt.close(); } } else throw new SQLException ("Could not open a JDBC connection to SQLite file "+ tempSqliteFile.getAbsolutePath()); } catch (Exception e) { //the connection will have been closed already, since we're used a try with resources log.error ("Error parsing temporary SVN SQLite database "+ sqliteConnectionUrl); } finally { //delete the temp file. //this will be deleted when the VM is shut down anyway, but better to be safe than to run out of disk space. tempSqliteFile.delete(); } } catch (IOException | ClassNotFoundException e) { log.error("An error occurred trying to set up to parse the SQLite based file: "+ e); // We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers return true; } } else if (svnXMLFormatMatcher.find()) { //XML format is being used, ( < SVN working copy format 7). //The XML based file was replaced with the text based format with SVN 1.4, when format 8 went live //Not all the working copy formats went live in SVN versions, so tracking the format against the SVN version is tricky. Document doc; try { //work around the "no protocol" issue by wrapping the content in a ByteArrayInputStream doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(content.getBytes("utf-8")))); } catch (SAXException | IOException e) { log.error("An error occurred trying to parse the XML based .svn/entries file: "+ e); // We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers return true; } NodeList nodelist = doc.getElementsByTagName("entry"); for ( int i=0; i< nodelist.getLength(); i++) { Node svnEntryNode = nodelist.item(i); String svnEntryName = ((Element)svnEntryNode).getAttribute("name"); String svnEntryKind = ((Element)svnEntryNode).getAttribute("kind"); String svnEntryUrl = ((Element)svnEntryNode).getAttribute("url"); String svnEntryCopyFromUrl = ((Element)svnEntryNode).getAttribute("copyfrom-url"); if ( svnEntryName != null && svnEntryName.length() > 0 ) { log.debug("Found a file/directory name in the (XML based) SVN < 1.4 entries file"); processURL(message, depth, "../" + svnEntryName + (svnEntryKind.equals("dir")?"/":""), baseURL); //get the internal SVN file, probably leading to source code disclosure if ( svnEntryKind.equals("file") ) { processURL(message, depth, "text-base/" + svnEntryName + ".svn-base", baseURL); } //re-seed the spider for this directory. if ( svnEntryKind.equals("dir") ) { processURL(message, depth, "../" + svnEntryName + "/.svn/entries", baseURL); } } //expected to be true for the first entry only (the directory housing other entries) if ( svnEntryName != null && svnEntryName.length() == 0 && svnEntryKind.equals("dir") ) { //exclude local repositories here.. we cannot retrieve or spider them Matcher repoMatcher = svnRepoLocationPattern.matcher(svnEntryUrl); if ( repoMatcher.find() ) { log.debug("Found an SVN repository location in the (XML based) SVN < 1.4 entries file"); processURL(message, depth, svnEntryUrl + "/", baseURL); } } //this attribute seems to be set on various entries. Correspond to files, rather than directories Matcher urlMatcher = svnRepoLocationPattern.matcher(svnEntryCopyFromUrl); if ( urlMatcher.find() ) { log.debug("Found an SVN URL in the (XML based) SVN < 1.4 entries file"); processURL(message, depth, svnEntryCopyFromUrl , baseURL); } } } else { //text based format us being used, so >= SVN 1.4, and < SVN 1.7.x //Parse each line in the ".svn/entries" file //we cannot use the StringTokenizer approach used by the robots.txt logic, //since this causes empty lines to be ignored, which causes problems... String previousline = null; String [] lines = content.split("\n"); for (String line : lines ) { // If the line is empty, skip it if (line.length() > 0) { //log.debug("Processing SVN entries line: " + line); Matcher matcher = svnTextFormatFileOrDirectoryPattern.matcher(line); if (matcher.find()) { //filetype is "dir" or "file", as per the contents of the SVN file. String filetype = matcher.group(0); //the previous line actually contains the file/directory name. if ( previousline != null && previousline.length() > 0 ) { log.debug("Found a file/directory name in the (text based) SVN 1.4/1.5/1.6 SVN entries file"); processURL(message, depth, "../" + previousline + (filetype.equals("dir")?"/":""), baseURL); //get the internal SVN file, probably leading to source code disclosure if ( filetype.equals("file") ) { processURL(message, depth, "text-base/" + previousline + ".svn-base", baseURL); } //re-seed the spider for this directory. if ( filetype.equals("dir") ) { processURL(message, depth, "../" + previousline + "/.svn/entries", baseURL); } } } else { //not a "file" or "dir" line, but it may contain details of the SVN repo location Matcher repoMatcher = svnRepoLocationPattern.matcher(line); if (repoMatcher.find()) { log.debug("Found an SVN repository location in the (text based) 1.4/1.5/1.6 SVN entries file"); processURL(message, depth, line + "/", baseURL); } } } //last thing to do is to record the line as the previous line for the next iteration. previousline = line; } } // We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers return true; } @Override public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyParsed) { // matches the file name of files that should be parsed with the SVN entries file parser Matcher matcher = SVN_ENTRIES_FILE_PATTERN.matcher(path); return matcher.find(); } }