/******************************************************************************* * gMix open source project - https://svs.informatik.uni-hamburg.de/gmix/ * Copyright (C) 2014 SVS * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. *******************************************************************************/ package staticContent.evaluation.traceParser.engine.converter; import java.io.IOException; import java.io.Writer; import java.util.HashMap; import java.util.HashSet; import java.util.Vector; import staticContent.evaluation.traceParser.engine.Protocol; import staticContent.evaluation.traceParser.engine.dataStructure.ExtendedTransaction; import staticContent.evaluation.traceParser.engine.dataStructure.Flow; import staticContent.evaluation.traceParser.engine.dataStructure.Packet; import staticContent.evaluation.traceParser.engine.dataStructure.Flow.FlowDirection; import staticContent.evaluation.traceParser.engine.dataStructure.Packet.TCPflags; import staticContent.evaluation.traceParser.engine.fileReader.PacketIterator; import staticContent.evaluation.traceParser.engine.fileReader.PacketSource; import staticContent.evaluation.traceParser.engine.protocolHandler.TCPhandler.TCPhandshakeStatus; import staticContent.evaluation.traceParser.engine.protocolHandler.TCPhandler.TCPteardownStatus; import staticContent.framework.util.Util; public class TCPFlowExtractor { private enum TransactionStatus {NONE, REQUEST_PHASE, REPLY_PHASE}; private final static long UNKNOWN = -1l; private final static int MULTIPART_REPLY_THRESHOLD = 5; // consecutive replies with a delay of less than MULTIPART_REPLY_THRESHOLD ms will be treated as a single reply private Writer resultTrace; private HashSet<String> flowIds; private HashMap<String, TempFlow> tempFlows; private long startOfTrace = Util.NOT_SET; //private Vector<TempFlow> removals; public void extractFlows(HashSet<String> flowIds, Writer resultTrace, PacketSource packetSource) throws IOException { this.flowIds = flowIds; this.resultTrace = resultTrace; //this.packetSource = packetSource; this.tempFlows = new HashMap<String, TempFlow>(10000); // TODO: change flowIds type to <FlowId.java> -> data structure with two fields: string id + TempFlow tempFlowReference -> avoid two hashtables //this.removals = new Vector<TempFlow>(1000); PacketIterator iterator = new PacketIterator(packetSource); while (iterator.hasNext()) { handlePacket(iterator.next()); } } /** * data structure that stores all packets of a distinct flow (one TempFlow * per "real" (tcp) flow). will be used to create an "Flow.java" * object after the last packet of this flow was added (tcp teardown ack). */ public class TempFlow { public String flowIdentifier; //long flowIdNumeric; public FlowDirection flowDirection; // who inited this flow? public Protocol layer4protocol; public String layer4protocolAsString; public String clientAddress; public String clientPort; public String serverAddress; public String serverPort; public TCPhandshakeStatus handshakeStatus; public TCPteardownStatus teardownStatus; //long startOfFlow; //Calendar startTime; //Calendar endTime; public Vector<Packet> packets; // stores all packets of this flow. will be used to create an "Flow.java" after the last packet of this flow was added (e.g. tcp teardown ack) public TempFlow(String flowIdentifier) { this.flowIdentifier = flowIdentifier; //this.flowIdNumeric = flowIdCounter++; packets = new Vector<Packet>(10); } } public Packet handlePacket(Packet packet) { /*if (packet.getSequenceNumber() % 50000 == 0) { // remove dead entries for (TempFlow flow:tempFlows.values()) if ( (packet.getTimestamp().getTimeInMillis() - flow.startOfFlow) > 5000 && flow.handshakeStatus != TCPhandshakeStatus.COMPLETE ) // no complete handshake after 5 seconds removals.add(flow); System.out.println("read " +packet.getSequenceNumber() +" packets so far. hm-size: " +tempFlows.size() +" - " +removals.size()); for (TempFlow flowToRemove:removals) tempFlows.remove(flowToRemove); removals.clear(); }*/ if (startOfTrace == Util.NOT_SET) startOfTrace = packet.getTimestamp().getTimeInMillis(); // filter: if (packet.getLayer3protocol() != Protocol.TCP) // this is a tcp flow extractor -> ignore other packets return null; if (packet.getLayer4length() == UNKNOWN) // we cannot replay packets with unknown size return null; //if (packet.getLayer4protocol() != Protocol.HTTP && packet.getLayer4protocol() != Protocol.HTTPS) // return null; String flowIdentifier = TCPflowFinder.getFlowIdentifier(packet); if (!flowIds.contains(flowIdentifier)) // will be handled in another run return null; TempFlow flow = tempFlows.get(flowIdentifier); if (flow == null) { // we dont know this flow yet if (packet.getTCPflags() != TCPflags.SYN) { // we haven't seen the start of this flow, so we ignore it (malformed packet, or start of flow not captured/not present in trace file) return null; } else { // add new flow flow = new TempFlow(flowIdentifier); tempFlows.put(flowIdentifier, flow); //flow.startOfFlow = packet.getTimestamp().getTimeInMillis(); } } if (flow.packets.size() > 1) { assert flow.packets.get(flow.packets.size()-1).getTimestamp().compareTo(packet.getTimestamp()) < 1; } flow.packets.add(packet); if (flow.handshakeStatus == null && packet.getTCPflags() == TCPflags.SYN) { // SYN flag set; first packet of the flow //System.out.println("found SYN"); flow.handshakeStatus = TCPhandshakeStatus.SYN_TRANSMITTED; //flow.startTime = packet.getTimestamp(); flow.flowDirection = packet.getFlowDirection(); flow.layer4protocol = packet.getLayer4protocol(); flow.layer4protocolAsString = packet.getLayer4protocol().toString(); flow.clientAddress = packet.getLayer2srcAddress(); flow.clientPort = packet.getLayer3srcAddress(); flow.serverAddress = packet.getLayer2dstAddress(); flow.serverPort = packet.getLayer3dstAddress(); } else if (flow.handshakeStatus == null) { // no handshakeStatus and no syn flag -> we haven't seen the start of this flow, so we ignore it //System.out.println("found packet not belonging to a connection"); tempFlows.remove(flow.flowIdentifier); return null; } else if (flow.handshakeStatus == TCPhandshakeStatus.SYN_TRANSMITTED) { if (packet.getTCPflags() != TCPflags.SYN_ACK) { // corrupt flow -> discart it //System.out.println("found corrupt flow: no SYN_ACK: " +flow.flowIdentifier +", " +packet.getTimestamp().getTime()+ " and " +packet.getTimestamp().get(Calendar.MILLISECOND) +"ms, size: " +packet.getLayer4length()); tempFlows.remove(flow.flowIdentifier); return null; } else { // none-corrupt flow // System.out.println("found SYN_ACK"); flow.handshakeStatus = TCPhandshakeStatus.SYN_ACK_TRANSMITTED; } } else if (flow.handshakeStatus == TCPhandshakeStatus.SYN_ACK_TRANSMITTED) { if (packet.getTCPflags() != TCPflags.ACK) { // corrupt flow -> discart it //System.out.println("found corrupt flow: no ACK after SYN_ACK"); tempFlows.remove(flow.flowIdentifier); return null; } else { // none-corrupt flow //System.out.println("found ACK -> handshake complete"); flow.handshakeStatus = TCPhandshakeStatus.COMPLETE; } } if (flow.teardownStatus == null && (packet.getTCPflags() == TCPflags.FIN || packet.getTCPflags() == TCPflags.FIN_ACK)) { // System.out.println("found FIN"); flow.teardownStatus = TCPteardownStatus.FIN1_TRANSMITTED; } else if (flow.teardownStatus == TCPteardownStatus.FIN1_TRANSMITTED && (packet.getTCPflags() == TCPflags.FIN || packet.getTCPflags() == TCPflags.FIN_ACK)) { // TODO: should we deal with half-open tcp connections? //System.out.println("tcp teardown complete"); flow.teardownStatus = TCPteardownStatus.COMPLETE; //flow.endTime = packet.getTimestamp(); // serialize and remove flow: serializeFlow(tempFlows.remove(flowIdentifier)); } //lastPacket = packet; return packet; } private void serializeFlow(TempFlow flow) { // TODO: extract and store latency of flow //System.out.println("serialize flow: start"); assert flow.handshakeStatus == TCPhandshakeStatus.COMPLETE; assert flow.teardownStatus == TCPteardownStatus.COMPLETE; // create Flow object Flow result = new Flow(); result.startOfFlow = flow.packets.get(2).getTimestamp().getTimeInMillis() - startOfTrace;// ignore handshake packets -> start with 3rd packet assert flow.packets.get(1).getLayer4length() == UNKNOWN || flow.packets.get(1).getLayer4length() == 0 : flow.packets.get(1); result.requestSize = 0; result.replySize = 0; result.senderAddress = flow.clientAddress; result.receiverAddress = flow.serverAddress; result.senderId = AddressMapper.getClientId(result.senderAddress); result.receiverID = AddressMapper.getServerId(result.receiverAddress); result.senderPort = Integer.parseInt(flow.clientPort); result.receiverPort = Integer.parseInt(flow.serverPort); result.layer4protocol = flow.layer4protocol; result.protocolAsString = flow.layer4protocolAsString; result.flowDirection = flow.flowDirection; // extract transactions: TransactionStatus transactionStatus = TransactionStatus.NONE; long startOfRequest = UNKNOWN; long endOfRequest = UNKNOWN; //long endOfRequestPhase = UNKNOWN; Vector<Long> startReplyOffsets = new Vector<Long>(); Vector<Long> endReplyOffsets = new Vector<Long>(); Vector<Integer> replySizes = new Vector<Integer>(); long startOfReply = UNKNOWN; long endOfReply = UNKNOWN; //long startOfReplyPhase = UNKNOWN; //long endOfTransaction = UNKNOWN; long lastActivity = UNKNOWN; long endOfLastTransaction = result.startOfFlow; //long startOfLastTransaction = UNKNOWN; int transactionRequestSize = 0; int transactionReplySize = 0; //int packetCtr = 1; for (int i=0; i<flow.packets.size(); i++) { Packet packet = flow.packets.get(i); long now = packet.getTimestamp().getTimeInMillis() - startOfTrace; // offset from start of trace assert now >= lastActivity; if (packet.getLayer4length() == 0 || packet.getLayer4length() == UNKNOWN) { // ignore ack packages; we are interested in payload only //System.out.println("serialize flow: NEW PACKET("+packetCtr++ +"): no payload packed -> ignore"); continue; } if (packet.getLayer2dstAddress().equals(result.receiverAddress)) { // REQUEST (from client to server) //System.out.println("serialize flow: NEW PACKET("+packetCtr++ +"): direction: from client to server"); switch (transactionStatus) { case NONE: // new transaction //System.out.println("serialize flow: status: NONE"); //System.out.println("serialize flow: it's the start of a new transaction (request)"); transactionStatus = TransactionStatus.REQUEST_PHASE; startOfRequest = now; endOfRequest = now; // may be changed later lastActivity = now; transactionRequestSize += packet.getLayer4length(); result.requestSize += packet.getLayer4length(); break; case REQUEST_PHASE: // REQUEST+REQUEST: packet might belong to the last request or a new one (decide by delay) //System.out.println("serialize flow: status: REQUEST_PHASE"); assert lastActivity != UNKNOWN; long timeSinceLastRequest = now - lastActivity; if (timeSinceLastRequest <= 1) { // packet belongs to last request: assume that a delay of more than one ms indicates a new transaction (no layer 4 application on the client should interrupt a send operation that long...) //System.out.println("serialize flow: packet belongs to last request"); lastActivity = now; endOfRequest = now; // may be changed later transactionRequestSize += packet.getLayer4length(); result.requestSize += packet.getLayer4length(); } else { // packet belongs to a new transaction //System.out.println("serialize flow: packet belongs to a new transaction"); // finish current transaction (as a transaction without reply): result.transactions.add(new ExtendedTransaction( (int) (startOfRequest - endOfLastTransaction), startOfRequest, endOfRequest, transactionRequestSize, result.receiverID, null, null, null )); endOfLastTransaction = endOfRequest; //endOfRequestPhase = UNKNOWN; // reset startOfReply = UNKNOWN; // reset endOfReply = UNKNOWN; // reset //startOfReplyPhase = UNKNOWN; // reset //endOfTransaction = UNKNOWN; // reset transactionRequestSize = 0; // reset transactionReplySize = 0; // reset // start new transaction: //startOfLastTransaction = startOfTransaction; transactionStatus = TransactionStatus.REQUEST_PHASE; startOfRequest = now; endOfRequest = now; // may be changed later lastActivity = now; transactionRequestSize += packet.getLayer4length(); result.requestSize += packet.getLayer4length(); } break; case REPLY_PHASE: // REPLY+REQUEST: packet ends the current transaction and starts a new one //System.out.println("serialize flow: status: REPLY_PHASE"); //System.out.println("serialize flow: it's the end of the current transaction (request)"); // store data about current reply: assert startOfReply != UNKNOWN; replySizes.add(transactionReplySize); startReplyOffsets.add(startOfReply); endReplyOffsets.add(endOfReply); result.transactions.add(new ExtendedTransaction( (int) (startOfRequest - endOfLastTransaction), startOfRequest, endOfRequest, transactionRequestSize, result.receiverID, Util.toLongArray(startReplyOffsets), Util.toLongArray(endReplyOffsets), Util.toIntArray(replySizes) )); endOfLastTransaction = endOfReply; //endOfRequestPhase = UNKNOWN; // reset startOfReply = UNKNOWN; // reset endOfReply = UNKNOWN; // reset //startOfReplyPhase = UNKNOWN; // reset //endOfTransaction = UNKNOWN; // reset transactionRequestSize = 0; // reset transactionReplySize = 0; // reset startReplyOffsets.clear(); // reset endReplyOffsets.clear(); // reset replySizes.clear(); // reset // start new transaction: //System.out.println("serialize flow: it's the start of a new transaction (request)"); //startOfLastTransaction = startOfTransaction; transactionStatus = TransactionStatus.REQUEST_PHASE; startOfRequest = now; endOfRequest = now; // may be changed later lastActivity = now; transactionRequestSize += packet.getLayer4length(); result.requestSize += packet.getLayer4length(); break; } } else { // REPLY (from server to client) //System.out.println("serialize flow: NEW PACKET("+packetCtr++ +"): direction: from server to client"); switch (transactionStatus) { case NONE: // server sends the first message transactionStatus = TransactionStatus.REPLY_PHASE; //endOfRequestPhase = now; startOfRequest = now; endOfRequest = now; lastActivity = now; transactionRequestSize = 0; result.requestSize = 0; transactionReplySize = packet.getLayer4length(); result.replySize += packet.getLayer4length(); startOfReply = now; endOfReply = now; // may be changed later //startOfReplyPhase = now; break; case REQUEST_PHASE: // REQUEST+REPLY: switch from REQUEST_PHASE to REPLY_PHASE //System.out.println("serialize flow: status: REQUEST_PHASE"); //System.out.println("serialize flow: it's the switch between request and reply phase"); transactionStatus = TransactionStatus.REPLY_PHASE; //endOfRequestPhase = lastActivity; startOfReply = now; endOfReply = now; // may be changed later //startOfReplyPhase = now; lastActivity = now; transactionReplySize += packet.getLayer4length(); result.replySize += packet.getLayer4length(); break; case REPLY_PHASE: // REPLY+REPLY: packet should belong to the last reply or be a new reply (delay decides) assert lastActivity != UNKNOWN; long timeSinceLastReply = now - lastActivity; if (timeSinceLastReply < MULTIPART_REPLY_THRESHOLD) { // assume that packet belongs to current reply lastActivity = now; endOfReply = now; transactionReplySize += packet.getLayer4length(); result.replySize += packet.getLayer4length(); break; } else { // assume that packet belongs to a new reply // finish last reply: startReplyOffsets.add(startOfReply); endReplyOffsets.add(endOfReply); replySizes.add(transactionReplySize); startOfReply = now; endOfReply = now; // may be changed later lastActivity = now; // add new Reply: transactionReplySize = packet.getLayer4length(); result.replySize += packet.getLayer4length(); } //System.out.println("serialize flow: status: REPLY_PHASE"); //long timeSinceLastReply = now - lastActivity; } } } // handle open transactions if (transactionStatus == TransactionStatus.REQUEST_PHASE && transactionRequestSize != 0) { //System.out.println("serialize flow: handle open transaction: REQUEST_PHASE"); result.transactions.add(new ExtendedTransaction( (int) (startOfRequest - endOfLastTransaction), startOfRequest, endOfRequest, transactionRequestSize, result.receiverID, null, null, null )); endOfLastTransaction = endOfRequest; } else if (transactionStatus == TransactionStatus.REPLY_PHASE) { //System.out.println("serialize flow: handle open transaction: REPLY_PHASE"); assert startOfReply != UNKNOWN; replySizes.add(transactionReplySize); startReplyOffsets.add(startOfReply); endReplyOffsets.add(endOfReply); result.transactions.add(new ExtendedTransaction( (int) (startOfRequest - endOfLastTransaction), startOfRequest, endOfRequest, transactionRequestSize, result.receiverID, Util.toLongArray(startReplyOffsets), Util.toLongArray(endReplyOffsets), Util.toIntArray(replySizes) )); endOfLastTransaction = endOfReply; } // serialize results if (result.transactions.size() != 0) { try { result.endOfFlow = lastActivity; assert result.endOfFlow >= result.startOfFlow; result.serialize(resultTrace); resultTrace.write("\n"); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("ERROR: could not wrtie flow to trace file " +resultTrace); } } //System.out.println("resulting transactions: "); //for (ApplicationLevelMessage transaction: result.transactions) // System.out.println(transaction); //System.out.println("serialize flow: finsihed"); } }