/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.zaproxy.zap.spider.parser;
import java.nio.ByteBuffer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.htmlparser.jericho.Source;
import org.parosproxy.paros.network.HttpMessage;
import org.zaproxy.zap.spider.SpiderParam;
/**
* The Class SpiderGitParser is used for parsing Git metadata from the .git/index file
* This parser currently supports Git internal index file versions 2,3, and 4. It does not currently support version 1,
* since this version is no longer supported. Version 1 appears to have disappeared with Git version 0.05 in 2005.
* @author 70pointer
*
*/
public class SpiderGitParser extends SpiderParser {
/** The Spider parameters. */
private SpiderParam params;
/** a pattern to match the file name of the Git index file */
private static final Pattern gitIndexFilenamePattern = Pattern.compile("/.git/index$");
/** a pattern to match the content of the Git index file */
private static final Pattern gitIndexContentPattern = Pattern.compile ("^DIRC");
private Pattern GIT_FILE_PATTERN = Pattern.compile("/\\.git/index$");
/**
* Instantiates a new spider Git Index parser.
*
* @param params the params
*/
public SpiderGitParser(SpiderParam params) {
super();
this.params = params;
}
@SuppressWarnings("unused")
@Override
public boolean parseResource(HttpMessage message, Source source, int depth) {
//parse the Git index file, based on publicly available (but incomplete) documentation of the file format, and some reverse-engineering.
if (message == null || !params.isParseGit()) {
return false;
}
log.debug("Parsing a Git resource...");
// Get the response content
byte [] data = message.getResponseBody().getBytes();
String baseURL = message.getRequestHeader().getURI().toString();
try {
String fullpath= message.getRequestHeader().getURI().getPath();
if (fullpath == null) fullpath = "";
if ( log.isDebugEnabled()) log.debug("The full path is ["+ fullpath + "]");
//make sure the file name is as expected
Matcher gitIndexFilenameMatcher = gitIndexFilenamePattern.matcher(fullpath);
if (!gitIndexFilenameMatcher.find()) {
log.warn("This path cannot be handled by the Git parser: " + fullpath);
return false;
}
//dealing with the Git Index file
Matcher gitIndexContentMatcher = gitIndexContentPattern.matcher(new String(data));
if (!gitIndexContentMatcher.find()) {
if (log.isDebugEnabled()) {
log.debug("The file '"+ fullpath + "' could not be parsed as a Git Index file due to unexpected content");
}
return false;
}
//it looks like a duck, and quacks like a duck.
//although it could still be an animatronic duck
ByteBuffer dataBuffer = ByteBuffer.wrap(data);
byte [] dircArray = new byte [4];
dataBuffer.get(dircArray, 0, 4);
int indexFileVersion = dataBuffer.getInt();
if ( log.isDebugEnabled() ) log.debug("The Git index file version is "+ indexFileVersion);
int indexEntryCount = dataBuffer.getInt();
if ( log.isDebugEnabled() ) log.debug(indexEntryCount + " entries were found in the Git index file ");
if ( indexFileVersion != 2 && indexFileVersion != 3 && indexFileVersion != 4) {
throw new Exception ("Only Git Index File versions 2, 3, and 4 are currently supported. Git Index File Version "+ indexFileVersion + " was found.");
}
//for version 4 (and upwards?), we need to know the previous entry name, so store it
String previousIndexEntryName = "";
for (int entryIndex = 0; entryIndex < indexEntryCount; entryIndex ++) {
int entryBytesRead = 0;
int indexEntryCtime1 = dataBuffer.getInt(); entryBytesRead+=4;
if ( log.isDebugEnabled() ) log.debug ("Entry "+ entryIndex + " has indexEntryCtime1 "+ indexEntryCtime1);
int indexEntryCtime2 = dataBuffer.getInt(); entryBytesRead+=4;
int indexEntryMtime1 = dataBuffer.getInt(); entryBytesRead+=4;
int indexEntryMtime2 = dataBuffer.getInt(); entryBytesRead+=4;
int indexEntryDev = dataBuffer.getInt(); entryBytesRead+=4;
int indexEntryInode = dataBuffer.getInt(); entryBytesRead+=4;
int indexEntryMode = dataBuffer.getInt(); entryBytesRead+=4;
int indexEntryUid = dataBuffer.getInt(); entryBytesRead+=4;
int indexEntryGid = dataBuffer.getInt(); entryBytesRead+=4;
int indexEntrySize = dataBuffer.getInt(); entryBytesRead+=4;
if ( log.isDebugEnabled() ) log.debug("Entry "+ entryIndex + " has size "+ indexEntrySize);
//size is unspecified for the entry id, but it seems to be 40 bytes SHA-1 string
//stored as 20 bytes, network order
byte [] indexEntryIdBuffer = new byte [20];
dataBuffer.get(indexEntryIdBuffer, 0, 20); entryBytesRead+=20;
String indexEntryId = new String (indexEntryIdBuffer);
short indexEntryFlags = dataBuffer.getShort(); entryBytesRead+=2;
if ( log.isDebugEnabled() ) log.debug ("Entry "+ entryIndex + " has flags " + indexEntryFlags);
//mask off all but the least significant 12 bits of the index entry flags to get the length of the name in bytes
int indexEntryNameByteLength = indexEntryFlags & 4095;
if ( log.isDebugEnabled() ) log.debug ("Entry "+ entryIndex + " has a name of length " + indexEntryNameByteLength);
//mask off all but the second most significant 12 bit of the index entry flags to get the extended flag for the entry
//int indexEntryExtendedFlag = indexEntryFlags & (int)16384;
int indexEntryExtendedFlag = ((indexEntryFlags & (1<<14) )>>14);
if ( log.isDebugEnabled() ) log.debug ("Entry "+ entryIndex + " has an extended flag of " + indexEntryExtendedFlag);
//check that we parsed out the index entry extended flag correctly.
//this is more of an assertion than anything. It's already saved my bacon once.
if (indexEntryExtendedFlag != 0 && indexEntryExtendedFlag != 1 ) {
throw new Exception ("Error parsing out the extended flag for index entry "+ entryIndex + ". We got "+ indexEntryExtendedFlag);
}
if ( indexFileVersion == 2 && indexEntryExtendedFlag != 0) {
throw new Exception ("Index File Version 2 is supposed to have the extended flag set to 0. For index entry "+ entryIndex + ", it is set to "+ indexEntryExtendedFlag);
}
//specific to version 3 and above, if the extended flag is set for the entry.
if (indexFileVersion > 2 && indexEntryExtendedFlag == 1) {
if ( log.isDebugEnabled() ) log.debug ("For Index file version "+ indexFileVersion +", reading an extra 16 bits for Entry "+ entryIndex );
short indexEntryExtendedFlags = dataBuffer.getShort(); entryBytesRead+=2;
if ( log.isDebugEnabled() ) log.debug ("Entry "+ entryIndex + " has (optional) extended flags " + indexEntryExtendedFlags);
}
String indexEntryName = null;
if ( indexFileVersion > 3 ) {
if ( log.isDebugEnabled() ) log.debug("Inflating the (deflated) entry name for index entry "+ entryIndex + " based on the previous entry name, since Index file version "+ indexFileVersion + " requires this");
//get bytes until we find one with the msb NOT set. count the bytes.
int n = 0, removeNfromPreviousName = 0;
byte msbsetmask = (byte)(1<<7); // 1000 0000
byte msbunsetmask = (byte) ((~ msbsetmask) & 0xFF ); // 0111 1111
while (++n > 0) {
byte byteRead = dataBuffer.get(); entryBytesRead++;
if (n==1) //zero the msb of the first byte read
removeNfromPreviousName = (removeNfromPreviousName << 8 ) | (0xFF & ( byteRead & msbunsetmask));
else //set the msb of subsequent bytes read
removeNfromPreviousName = (removeNfromPreviousName << 8 ) | (0xFF & ( byteRead | msbsetmask));
if ( ( byteRead & msbsetmask) == 0 ) break; //break if msb is NOT set in the byte
}
if (log.isDebugEnabled()) log.debug("We read "+ n + " bytes of variable length data from before the start of the entry name");
if ( n > 4 )
throw new Exception ("An entry name is never expected to be > 2^^32 bytes long. Some file corruption may have occurred, or a parsing error has occurred");
//now read the (partial) name for the current entry
int bytesToReadCurrentNameEntry = indexEntryNameByteLength- (previousIndexEntryName.length() - removeNfromPreviousName);
byte [] indexEntryNameBuffer = new byte [bytesToReadCurrentNameEntry];
dataBuffer.get(indexEntryNameBuffer, 0, bytesToReadCurrentNameEntry); entryBytesRead+=bytesToReadCurrentNameEntry;
//build it up
indexEntryName = previousIndexEntryName.substring(0, previousIndexEntryName.length() - removeNfromPreviousName) + new String (indexEntryNameBuffer);
} else {
//indexFileVersion <= 3 (waaaaay simpler logic, but the index file is larger in this version than for v4+)
byte [] indexEntryNameBuffer = new byte [indexEntryNameByteLength];
dataBuffer.get(indexEntryNameBuffer, 0, indexEntryNameByteLength); entryBytesRead+=indexEntryNameByteLength;
indexEntryName = new String (indexEntryNameBuffer);
}
if ( log.isDebugEnabled() ) log.debug("Entry "+ entryIndex + " has name "+ indexEntryName);
//and store off the index entry name, for the next iteration
previousIndexEntryName=indexEntryName;
//skip past the zero byte terminating the string (whose purpose seems completely pointless to me, but hey)
byte indexEntryNul = dataBuffer.get(); entryBytesRead++;
//the padding after the pathname does not exist for versions 4 or later.
if ( indexFileVersion < 4 ) {
if ( log.isDebugEnabled() ) log.debug("Aligning to an 8 byte boundary after Entry "+ entryIndex + ", since Index file version "+ indexFileVersion + " mandates 64 bit alignment for index entries");
int entryBytesToRead=((8-(entryBytesRead%8))%8);
if ( log.isDebugEnabled() ) {
log.debug ("The number of bytes read for index entry "+ entryIndex + " thus far is: "+ entryBytesRead);
log.debug ("So we must read "+ entryBytesToRead + " bytes to stay on a 64 bit boundary");
}
//read the 0-7 (NUL) bytes to keep reading index entries on an 8 byte boundary
byte [] indexEntryPadBuffer = new byte [entryBytesToRead];
dataBuffer.get(indexEntryPadBuffer, 0, entryBytesToRead); entryBytesRead+=entryBytesToRead;
}
else {
if ( log.isDebugEnabled() ) log.debug("Not aligning to an 8 byte boundary after Entry "+ entryIndex + ", since Index file version "+ indexFileVersion + " does not mandate 64 bit alignment for index entries");
}
//Git does not store entries for directories, but just files/symlinks/Git links, so no need to handle directories here, unlike with SVN, for instance.
if ( indexEntryName != null && indexEntryName.length() > 0 ) {
log.info("Found file/symbolic link/gitlink "+ indexEntryName + " in the Git entries file");
processURL(message, depth, "../" + indexEntryName, baseURL);
}
}
//all good, we're outta here.
// We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers
return true;
} catch (Exception e) {
log.warn("An error occurred trying to parse Git url '"+ baseURL + "': "+ e);
// We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers
return true;
}
}
@Override
public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyParsed) {
// matches the file name of files that should be parsed with the GIT file parser
Matcher matcher = GIT_FILE_PATTERN.matcher(path);
return matcher.find();
}
}