package org.fi;
import org.fi.*;
import org.fi.FMServer.FailType;
import org.fi.FMJoinPoint.*;
import java.io.*;
import java.util.*;
public class FMFilter {
//JINSU hack
//Filter Id specified in wl-config.xml
//public static String FID = FMLogic.FILTER_ID;
public FMFilter() {}
// *************************************************
// This is the part where "extensible. Given information
// in fac, and ft. You can specify which failure that you
// want to exercise
// *************************************************
public static boolean passServerFilter(FMAllContext fac, FailType ft, FIState fis) {
String FID = FMLogic.FILTER_ID;
if(FID.equals("") || FID == null) {
FID = FMLogic.getFilterId();
}
boolean passFilter = false;
FID = FID.toLowerCase().trim();
if(FID.equals("readrepair1")) {
passFilter = filterReadRepairTest1(fac, ft, fis);
}
else if(FID.equals("readrepair2")) {
passFilter = filterReadRepairTest2(fac, ft, fis);
}
else if(FID.equals("readrepair3")) {
passFilter = filterReadRepairTest3(fac, ft, fis);
}
else if(FID.equals("insertion1")) {
passFilter = filterInsertionTest1(fac, ft, fis);
}
else if(FID.equals("insertion2")) {
passFilter = filterInsertionTest2(fac, ft, fis);
}
else {
passFilter = filterReadRepairTest3(fac, ft, fis);
}
//boolean passFilter = true;
//JINSU hack for Cassandra corruption to pass by.
//passFilter = filterReadRepairTest1(fac, ft, fis);
// passFilter = filterReadRepairTest2(fac, ft, fis);
//passFilter = filterReadRepairTest3(fac, ft, fis);
//passFilter = filterSimpleRRTest(fac, ft, fis);
// passFilter = filterWriteBug1(fac, ft, fis);
// passFilter = filterWriteBug3(fac, ft, fis);
// passFilter = filterWriteBug5(fac, ft, fis);
// passFilter = filterWriteBug6(fac, ft, fis);
// passFilter = filterWriteBug7(fac, ft, fis);
// passFilter = filterAppendBug2(fac, ft, fis);
// passFilter = filterAppendBug4(fac, ft, fis);
// passFilter = filterAppendBug5(fac, ft, fis); Can't reproduce so far
// passFilter = filterAppendBug6(fac, ft, fis);
// passFilter = filterAppendBug7(fac, ft, fis);
// passFilter = filterAppendBug8(fac, ft, fis);
//JINSU : putting in filter for crash failure
//I honestly don't know what this is gonna do.
//please explain...
//passFilter = filterTest(fac, ft, fis);
return passFilter;
}
//JINSU : Checks if to see if we are dealing with digest messages.
private static boolean cassDigestTest(FIState fis, String node) {
FMJoinPoint fjp = fis.fjp;
FMContext ctx = fis.ctx;
FailType ft = fis.ft;
if ( ctx.getMessageType().equalsIgnoreCase(FMClient.READ_RESPONSE_DIGEST)
&& ft == FailType.CORRUPTION
&& cassNodeTest(ctx, node) ) {
return true;
}
return false;
}
//JINSU : Checks if the message is the data messages.
private static boolean cassDataTest(FIState fis, String node) {
FMJoinPoint fjp = fis.fjp;
FMContext ctx = fis.ctx;
FailType ft = fis.ft;
if ( ctx.getMessageType().equalsIgnoreCase(FMClient.READ_RESPONSE_NORMAL)
&& cassNodeTest(ctx, node) ) {
return true;
}
return false;
}
//JINSU : Checks to see if the context contains node.
private static boolean cassNodeTest(FMContext ctx, String node) {
return ctx.getNodeId().equalsIgnoreCase(node);
}
private static boolean filterInsertionTest1(FMAllContext fac, FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
System.out.println("filter JPS || " + fjp.getJoinPointStr() + "\nfilter ctx || " + ctx);
if( FMLogic.getCurrentFsn() == 1 && ft == FailType.CRASH
&& fjp.getJoinPointStr().contains("DataOutputStream")
&& (cassNodeTest(ctx, "Node1")
) ) {
return true;
}
return false;
}
private static boolean filterInsertionTest2(FMAllContext fac, FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
System.out.println("filter JPS || " + fjp.getJoinPointStr() + "\nfilter ctx || " + ctx);
if( FMLogic.getCurrentFsn() == 1 && ft == FailType.CRASH
&& fjp.getJoinPlc() == JoinPlc.BEFORE
&& fjp.getJoinPointStr().contains("DataInputStream")
&& (cassNodeTest(ctx, "Node0")
) ) {
return true;
}
return false;
}
// ************************************************************
// JINSU: A small filter function that only returns true when the failure type is CRASH and the join place is before.
private static boolean filterReadRepairTest3(FMAllContext fac, FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
int fsnNum = FMLogic.getCurrentFsn();
if ( ( fsnNum == 1 )
&& cassDigestTest(fis, "Node2") )
return true;
if( fsnNum == 3) {
}
if( ( fsnNum == 2 || fsnNum == 3 )
&& (cassDataTest(fis, "Node2") )
&& ft == FailType.CORRUPTION
)
return true;
if( ( fsnNum == 2 || fsnNum == 3 )
&& cassDataTest(fis, "Node3")
&& ft == FailType.CRASH )
return true;
return false;
}
// ************************************************************
// JINSU: A small filter function that only returns true when the failure type is CRASH and the join place is before.
private static boolean filterReadRepairTest2(FMAllContext fac, FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
int fsnNum = FMLogic.getCurrentFsn();
if ( ( fsnNum == 1 || fsnNum == 2 || fsnNum == 3 )
&& cassDataTest(fis, "Node1")
&& ft == FailType.CORRUPTION )
return true;
if( ( fsnNum == 2 || fsnNum == 3 )
&& ft == FailType.CRASH
&& fjp.getJoinPlc() == JoinPlc.BEFORE
&& fjp.getJoinPointStr().contains("DataOutputStream")
&& (cassNodeTest(ctx, "Node1") )
) {
return true;
}
return false;
}
// ************************************************************
// JINSU: A small filter function that only returns true when the failure type is CRASH and the join place is before.
private static boolean filterReadRepairTest1(FMAllContext fac, FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
int fsnNum = FMLogic.getCurrentFsn();
if ( fsnNum == 1 && cassDigestTest(fis, "Node2") )
return true;
if( ( fsnNum == 2
|| fsnNum == 3 )
&& ft == FailType.CRASH
&& fjp.getJoinPlc() == JoinPlc.BEFORE
&& fjp.getJoinPointStr().contains("DataOutputStream")
&& (cassNodeTest(ctx, "Node3")
) ) {
return true;
}
if ( ( fsnNum == 2 || fsnNum == 3 )
&& cassDataTest(fis, "Node1")
&& ft == FailType.CORRUPTION
) {
return true;
}
return false;
}
// **************************************************
private static boolean filterTransientDisk(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String nodeId = ctx.getNodeId();
boolean isDiskIO = Util.isDiskIO(ctx.getTargetIO());
boolean transientFailure = (ft == FailType.EXCEPTION || ft == FailType.RETFALSE);
boolean writeIO = (fjp.getJoinIot() == JoinIot.WRITE);
boolean inDatanode = nodeId.contains("DataNode");
if (transientFailure &&
isDiskIO && inDatanode && writeIO) {
return true;
}
return false;
}
// **************************************************
// Append-Bug 2:
// http://hdfswiki.pbworks.com/Block-lost-when-primary-crashes-in-recoverBlock
// Block is lost if primary datanode crashes in the middle tryUpdateBlock.
// # available datanode = 2
// # replica = 2
// # disks / datanode = 1
// # failures = 1
// # failure type = crash
// **************************************************
private static boolean filterAppendBug2(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
JoinPlc joinPlace = fjp.getJoinPlc();
if (ft == FailType.CRASH &&
joinPlace == JoinPlc.AFTER &&
ctx.getNodeId().equals("DataNode-1") &&
fst.toString().contains("tryUpdateBlock") &&
joinPoint.contains("renameTo") &&
!ctx.getTargetIO().contains(".meta_tmp") && // rename metafile to tmpfile
FMLogic.getMaxFsn() == 1)
return true;
return false;
}
// **************************************************
// append-bug #4:
// http://hdfswiki.pbworks.com/UpdateBlock-fails-due-to-unmatched-file-length
// # available datanodes = 3
// # replicas = 3
// # disks / datanode = 1
// # failures = 1
// failure type = bad disk
// When/where failure happens = (see below)
// This bug is non-deterministic, to reproduce it, add a sufficient sleep
// before out.write() in BlockReceiver.receivePacket() in dn1 and dn2 but not dn3
// ALSO: go to datanode/BlockReceiver.java, and enable "enableAddDelay"
// **************************************************
private static boolean filterAppendBug4(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
JoinPlc joinPlace = fjp.getJoinPlc();
boolean isDataFile = Util.isDataFile(ctx.getTargetIO());
int fsn = FMLogic.getCurrentFsn();
if (
ft == FailType.BADDISK &&
//joinPlace == JoinPlc.AFTER &&
ctx.getNodeId().equals("DataNode-3") &&
fst.toString().contains("receivePacket") &&
joinPoint.contains("write") &&
isDataFile &&
FMLogic.getMaxFsn() == 1)
return true;
return false;
}
// **************************************************
// append-bug #5:
// http://hdfswiki.pbworks.com/
// CRC-does-not-match-when-retrying-appending-a-partial-block
// # available datanodes = 2
// # replicas = 2
// # ALSO: change FILE_SIZE to 16 in workload-driver Driver/Hdfs.java
// # disks / datanode = 1
// # failures = 1
// failure type = bad disk
// NOTE: I couldn't reproduce this so far
// **************************************************
private static boolean filterAppendBug5(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
JoinPlc joinPlace = fjp.getJoinPlc();
boolean isDataFile = Util.isDataFile(ctx.getTargetIO());
if (ft == FailType.BADDISK &&
// joinPlace == JoinPlc.AFTER &&
ctx.getNodeId().equals("DataNode-2") &&
fst.toString().contains("receivePacket") &&
joinPoint.contains("write") &&
isDataFile &&
FMLogic.getMaxFsn() == 1)
return true;
return false;
}
// **************************************************
// Append-Bug 6:
// http://hdfswiki.pbworks.com/
// DFSClient-incorrectly-asks-for-new-block-if-primary-
// crashes-during-first-recoverBlock
// this filter is exactly the same as that of append bug 2
// all we need to do is the primary data node crash during recoverBlock
// # available datanodes = 2
// # replicas = 2
// # disks / datanode = 1
// # failures = 1
// failure type = crash
// When/where failure happens = during primary's recoverBlock
// **************************************************
private static boolean filterAppendBug6(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
JoinPlc joinPlace = fjp.getJoinPlc();
if (ft == FailType.CRASH &&
joinPlace == JoinPlc.AFTER &&
ctx.getNodeId().equals("DataNode-1") &&
fst.toString().contains("tryUpdateBlock") &&
joinPoint.contains("renameTo") &&
!ctx.getTargetIO().contains(".meta_tmp") && // rename metafile to tmpfile
FMLogic.getMaxFsn() == 1)
return true;
return false;
}
// **************************************************
// append Bug #7
// http://hdfswiki.pbworks.com/Generation-Stamp-mismatches%2C-leading-to-failed-append
// # available datanodes = 3
// # replicas = 3
// # disks / datanode = 1
// # failures = 2
// failure type = crash
// **************************************************
private static boolean filterAppendBug7(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
JoinPlc joinPlace = fjp.getJoinPlc();
boolean isDataFile = Util.isDataFile(ctx.getTargetIO());
int fsn = FMLogic.getCurrentFsn();
if (
(fsn == 1 &&
ft == FailType.CRASH &&
joinPlace == JoinPlc.AFTER &&
ctx.getNodeId().equals("DataNode-3") &&
fst.toString().contains("receivePacket") &&
joinPoint.contains("write") &&
isDataFile &&
FMLogic.getMaxFsn() == 2)
||
(fsn == 2 &&
ft == FailType.CRASH &&
joinPlace == JoinPlc.AFTER &&
ctx.getNodeId().equals("DataNode-1") &&
fst.toString().contains("tryUpdateBlock") &&
joinPoint.contains("renameTo") &&
!ctx.getTargetIO().contains(".meta_tmp") &&
FMLogic.getMaxFsn() == 2)
)
return true;
return false;
}
// **************************************************
// append Bug #8
// http://hdfswiki.pbworks.com/
// Corrupted-block-if-a-crash-happens-before-writing-to-
// checksumOut-but-after-writing-to-dataOut
// # available datanodes = 1
// # replicas = 1
// # disks / datanode = 1
// # failures = 1
// failure type = crash
// **************************************************
private static boolean filterAppendBug8(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
JoinPlc joinPlace = fjp.getJoinPlc();
boolean isDataFile = Util.isDataFile(ctx.getTargetIO());
if (ft == FailType.CRASH &&
joinPlace == JoinPlc.AFTER &&
ctx.getNodeId().equals("DataNode-1") &&
fst.toString().contains("receivePacket") &&
joinPoint.contains("write") &&
isDataFile &&
FMLogic.getMaxFsn() == 1)
return true;
return false;
}
// **************************************************
// WRITE BUG # 1:
// http://hdfswiki.pbworks.com/FrontPage
// dfs.replication = 1
// #datanodes = 1
// #disks/datanode = 2 (see hdfs-site.xml, dfs.data.dir, enable two data disks)
// MAX_FSN = 1
// Workload = putfile
// FailTpe = BadDisk
// **************************************************
private static boolean filterWriteBug1(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
String diskId = Util.getDiskIdFromTargetIO(ctx.getTargetIO());
boolean firstDisk = diskId.equals("Disk1");
boolean secondPhase = joinPoint.contains("renameTo");
boolean isDataFile = Util.isDataFile(ctx.getTargetIO());
boolean badDiskFailure = (ft == FailType.BADDISK);
if (firstDisk && secondPhase && isDataFile && badDiskFailure) {
return true;
}
return false;
}
// **************************************************
// WRITE BUG # 3:
// http://hdfswiki.pbworks.com/Namenode-returning-the-same-Datanode-to-client%2C-due-to-infrequent-heartbeat
// There is _one_ bad-disk failure during the first phase.
// Setup:
// dfs.replication = 2 (in "hdfs-site.xml")
// #datanodes = 3 (in "slaves")
// MAX_FSN = 1 (in Driver.java)
// Workload = putfile
// FailType = BadDisk
// **************************************************
private static boolean filterWriteBug3(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
boolean isDataFile = Util.isDataFile(ctx.getTargetIO());
boolean firstPhase = joinPoint.contains("RandomAccessFile(File, String)");
if (ft == FailType.BADDISK &&
ctx.getNodeId().equals("DataNode-1") &&
firstPhase &&
isDataFile &&
FMLogic.getMaxFsn() == 1) {
return true;
}
return false;
}
// **************************************************
// WRITE BUG # 5:
// http://hdfswiki.pbworks.com/Client-uselessly-retries-recoverBlock-5-times
// There are two bad-disk failures.
// The first one happens in the 2nd phase at DN-2
// This will force the client to create another pipeline
// just for DN-1. But DN-1 fails again.
// The client gives up, without contacting the namenode again
// Setup:
// dfs.replication = 2 (in "hdfs-site.xml")
// #datanodes = 4 (in "slaves")
// MAX_FSN = 2 (in Driver.java)
// Workload = putfile
// FailType = BadDisk
// **************************************************
private static boolean filterWriteBug5(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
boolean secondPhase = joinPoint.contains("renameTo");
boolean isDataFile = Util.isDataFile(ctx.getTargetIO());
if (ft == FailType.BADDISK &&
secondPhase &&
isDataFile &&
FMLogic.getMaxFsn() == 2 &&
fst.contains("PacketResponder", "lastDataNodeRun")
) {
return true;
}
return false;
}
// **************************************************
// WRITE BUG # 6:
// http://hdfswiki.pbworks.com/A-block-is-stuck-in-ongoingRecovery-due-to-exception-not-propagated
// Setup:
// dfs.replication = 2 (in "hdfs-site.xml")
// #datanodes = 4 (in "slaves")
// MAX_FSN = 2 (in Driver.java)
// Workload = putfile
// FailType = Exception/Retfalse
// **************************************************
private static boolean filterWriteBug6(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
boolean secondPhase = joinPoint.contains("renameTo");
boolean isMetaFile = Util.isMetaFile(ctx.getTargetIO());
boolean isCurrentTmpMeta = Util.isCurrentTmpMeta(ctx.getTargetIO());
int fsn = FMLogic.getCurrentFsn();
String nodeId = ctx.getNodeId();
// the 1st failure
if (fsn == 1 &&
ft == FailType.EXCEPTION &&
isMetaFile &&
nodeId.equals("DataNode-1") &&
fst.contains("BlockReceiver", "flush") &&
joinPoint.contains("DataOutputStream.flush")) {
return true;
}
// the 2nd failure
if (fsn == 2 &&
ft == FailType.RETFALSE &&
isCurrentTmpMeta &&
nodeId.equals("DataNode-2") &&
fst.contains("FSDataset", "tryUpdateBlock") &&
joinPoint.contains("File.renameTo")) {
return true;
}
return false;
}
// **************************************************
// WRITE BUG # 7:
// http://hdfswiki.pbworks.com
// Setup:
// dfs.replication = 2 (in "hdfs-site.xml")
// #datanodes = 4 (in "slaves")
// MAX_FSN = 2 (in Driver.java)
// Workload = putfile
// FailType = Exception/Retfalse
// **************************************************
private static boolean filterWriteBug7(FMAllContext fac,
FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
String joinPoint = fjp.getJoinPointStr();
boolean secondPhase =
joinPoint.contains("renameTo") && fst.contains("PacketResponder", "run");
boolean transientFailure = (ft == FailType.EXCEPTION || ft == FailType.RETFALSE);
boolean isMetaFile = Util.isMetaFile(ctx.getTargetIO());
int fsn = FMLogic.getCurrentFsn();
String nodeId = ctx.getNodeId();
if (transientFailure && isMetaFile && secondPhase) {
// Note: This is concurrent failures!
if (fsn == 1 && nodeId.equals("DataNode-1"))
return true;
if (fsn == 2 && nodeId.equals("DataNode-2"))
return true;
}
return false;
}
// ********************************
// Playing around with filters here
// *********************************
private static boolean passServerFilterRepository
(FMAllContext fac, FailType ft, FIState fis) {
FMJoinPoint fjp = fac.fjp;
FMContext ctx = fac.ctx;
FMStackTrace fst = fac.fst;
int fsn = FMLogic.getCurrentFsn();
String nodeId = ctx.getNodeId();
boolean isBlockFile = Util.isBlockFile(ctx.getTargetIO());
boolean isMetaFile = Util.isMetaFile(ctx.getTargetIO());
boolean isDataFile = Util.isDataFile(ctx.getTargetIO());
boolean isNetIoToDn = Util.isNetIOtoDataNode(fac.ctx.getTargetIO());
JoinIot ioType = fjp.getJoinIot();
String joinPoint = fjp.getJoinPointStr();
String diskId = Util.getDiskIdFromTargetIO(ctx.getTargetIO());
JoinPlc jplc = fjp.getJoinPlc();
// nodeId.equals("DataNode-2") && diskId.equals("Disk1"))
boolean firstPhase = joinPoint.contains("RandomAccessFile(File, String)");
boolean secondPhase =
joinPoint.contains("renameTo") ||
joinPoint.contains("flush");
// just an optimization .. specifically we're interested
// in client write and datanode failure only
// hence we only "filter" data file/meta write
// [file][/rhh/dfs/data1/tmp/blk_7307724612204181421]
// and for network stream, node id should not be client (unknown so far)
boolean passFilter = false;
// abc
if (nodeId.contains("DataNode") &&
ft == FailType.EXCEPTION || ft == FailType.RETFALSE) {
passFilter = true;
}
// System.out.println("_passFilter_ " + passFilter);
return passFilter;
}
}