/* * Architecture.java - This file is part of the Jakstab project. * Copyright 2007-2015 Johannes Kinder <jk@jakstab.org> * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, see <http://www.gnu.org/licenses/>. */ package org.jakstab.ssl; import java.io.*; import java.util.*; import org.jakstab.Options; import org.jakstab.util.Logger; import org.jakstab.asm.*; import org.jakstab.asm.x86.*; import org.jakstab.cfa.RTLLabel; import org.jakstab.rtl.*; import org.jakstab.rtl.expressions.*; import org.jakstab.rtl.statements.*; import org.jakstab.ssl.parser.*; import antlr.ANTLRException; /** * This class represents the physical architecture a program runs on. It is * initialized by loading the corresponding SSL specification. It should * encapsulate all architecture specific aspects of the analyses, and * also supports the conversion of disassembled instructions to RTL statements. * * @author Johannes Kinder */ public class Architecture { private final static Logger logger = Logger.getLogger(Architecture.class); // Variables that can be safely forgotten when crossing instruction boundaries // This should better be extracted from the SSL somehow (e.g. by making all // explicitly declared registers non-temporary. private static final SetOfVariables temporaryVariables; // flags private static final SetOfVariables statusFlags; static { temporaryVariables = new SetOfVariables(Arrays.asList(new RTLVariable[] { ExpressionFactory.createVariable("tmpb", 8), ExpressionFactory.createVariable("tmph", 16), ExpressionFactory.createVariable("tmp1", 32), ExpressionFactory.createVariable("tmp2", 32), ExpressionFactory.createVariable("tmp3", 32), ExpressionFactory.createVariable("tmp4", 32), ExpressionFactory.createVariable("tmp5", 32), ExpressionFactory.createVariable("retaddr", 32), ExpressionFactory.createVariable("tmpl", 64), ExpressionFactory.createVariable("tmpD", 80), ExpressionFactory.createVariable("tmpD1", 80), ExpressionFactory.createVariable("tmpD2", 80), ExpressionFactory.createVariable("tmpb1", 8), ExpressionFactory.createVariable("tmpb2", 8), ExpressionFactory.createVariable("tmph1", 16), ExpressionFactory.createVariable("tmph2", 16), ExpressionFactory.createVariable("tmpq2", 64), ExpressionFactory.createVariable("tmpq3", 64), ExpressionFactory.createVariable("tmpq5", 64) })); statusFlags = new SetOfVariables(Arrays.asList(new RTLVariable[] { ExpressionFactory.createVariable("%AF", 1), ExpressionFactory.createVariable("%CF", 1), ExpressionFactory.createVariable("%C1", 1), ExpressionFactory.createVariable("%C2", 1), ExpressionFactory.createVariable("%FLF", 1), ExpressionFactory.createVariable("%FZF", 1), ExpressionFactory.createVariable("%fsw", 16), ExpressionFactory.createVariable("%fcw", 16), ExpressionFactory.createVariable("%fstp", 8), ExpressionFactory.createVariable("%DF", 1), ExpressionFactory.createVariable("%IF", 1), ExpressionFactory.createVariable("%OF", 1), ExpressionFactory.createVariable("%PF", 1), ExpressionFactory.createVariable("%SF", 1), ExpressionFactory.createVariable("%ZF", 1) })); } private File specFile; private Map<String, SSLInstruction> instructions; private Map<String, List<SSLInstruction>> instrGroups; private final RTLVariable stackPointer; private final RTLVariable framePointer; private final RTLVariable loopCounter; private final RTLVariable stringSource; private final RTLVariable stringTarget; private final RTLVariable retAddrVar; private final MagicInstructions magicInstructions; private SetOfVariables registers; /** * Parses an SSL specification from a given filename and converts the RTL * blocks to canonical form. * * @param fileName The path of the SSL file to be parsed. */ public Architecture(String fileName) throws FileNotFoundException, ANTLRException { parseSSL(fileName); magicInstructions = new MagicInstructions(); stackPointer = ExpressionFactory.createVariable("%esp", 32); framePointer = ExpressionFactory.createVariable("%ebp", 32); retAddrVar = ExpressionFactory.createVariable("retaddr", 32); loopCounter = ExpressionFactory.createVariable("%ecx", 32); stringSource = ExpressionFactory.createVariable("%esi", 32); stringTarget = ExpressionFactory.createVariable("%edi", 32); } public RTLVariable stackPointer() { return stackPointer; } public RTLVariable returnAddressVariable() { return retAddrVar; } public RTLVariable framePointer() { return framePointer; } public RTLVariable programCounter() { return ExpressionFactory.pc; } public RTLVariable loopCounter() { return loopCounter; } public RTLVariable stringSource() { return stringSource; } public RTLVariable stringTarget() { return stringTarget; } public SetOfVariables getTemporaryVariables() { return temporaryVariables; } public SetOfVariables getStatusFlags() { return statusFlags; } public SetOfVariables getRegisters() { return registers; } public boolean isRegister(RTLVariable var) { return registers.contains(var); } /** * Writes the whole instruction set to System.out. */ void dumpToConsole() { for (Iterator<Map.Entry<String, SSLInstruction>> iter = instructions.entrySet().iterator(); iter.hasNext();) { Map.Entry<String, SSLInstruction> entry = iter.next(); SSLInstruction instr = entry.getValue(); System.out.println(instr.toString() + ": " + instr.getBody()); } } // Scores for ranking SSL instruction matches. private final int NUM_OPERANDS_SCORE = 100; // Correct number of operands private final int IMPLICIT_OPERAND_MATCH_SCORE = 5; // Parameter name matches suffix (AL, AX, EAX, CS, DS, ES) private final int OPERAND_TYPE_EXACT_SCORE = 4; // Parameter type matches exactly (i, reg, modrm, mem) private final int OPERAND_TYPE_MATCH_SCORE = 3; // Parameter type possibly matches (modrm to reg) private static final String[] repInstructions = new String[] { "CMPS", "LODS", "MOVS", "SCAS", "STOS", "INS", "OUTS", "NOP" }; /** * Returns the name of this instruction in the SSL definitions. * Used for looking up RTL descriptions. * Looks for opcode name and performs parameter matching: * * @param instr The instruction to be matches with an SSL prototype * @return The name of the prototype */ private SSLInstruction matchInstruction(Instruction instr) { String name = instr.getName(); name = name.toUpperCase(Locale.ENGLISH); if (instr instanceof X86Instruction) { X86Instruction x86instr = (X86Instruction)instr; // This applies only to "special" instructions used by harness files // for generating IR statements directly if (x86instr.hasPrefixLOCK() && x86instr.hasPrefixREPZ()) { if (name.equals("INCL")) { return magicInstructions.getAllocPrototype(); } else if (name.equals("NOTL")) { return magicInstructions.getDeallocPrototype(); } else if (name.equals("SUBL")) { return magicInstructions.getHavoc32Prototype(); } else if (name.equals("SUBW")) { return magicInstructions.getHavoc16Prototype(); } else if (name.equals("SUBB")) { return magicInstructions.getHavoc8Prototype(); } else if (name.equals("MOVL")) { return magicInstructions.getNondet32Prototype(); } else if (name.equals("MOVW")) { return magicInstructions.getNondet16Prototype(); } else if (name.equals("MOVB")) { return magicInstructions.getNondet8Prototype(); } else if (name.equals("ADDL")) { return magicInstructions.getAssertGTPrototype(); } else if (name.equals("CMPL")) { return magicInstructions.getAssertEQPrototype(); } else if (name.equals("ADCL")) { return magicInstructions.getAssertGEPrototype(); } else { logger.warn("Instruction with both LOCK and REP prefixes was not recognized as Jakstab magic!"); } } // Rename instructions with REP prefix to the SSL-format // but only do this where it has an effect, otherwise drop it else if (x86instr.hasPrefixREPZ() || x86instr.hasPrefixREPNZ()) { boolean legalREP = false; for (String repInstr : repInstructions) { if (name.startsWith(repInstr)) { if (x86instr.hasPrefixREPZ()) name = "REP" + name; if (x86instr.hasPrefixREPNZ()) name = "REPNE" + name; legalREP = true; break; } } if (!legalREP) logger.info("Ignoring REP prefix of " + name + "!"); } } // check for a direct match and return it if there are no other possibilities if (instructions.containsKey(name) && (!instrGroups.containsKey(name) || instrGroups.get(name).size() == 1)) { return instructions.get(name); } if (!(instrGroups.containsKey(name))) { logger.warn("SSL library has no entry for " + name + "!"); return null; } List<SSLInstruction> instrList = instrGroups.get(name); // Is there only one match? if (instrList.size() == 1) return instrList.get(0); // If there are more, do matching int maxScore = -1; int score = 0; SSLInstruction maxMatch = null; //StringBuilder sb = new StringBuilder(); for (SSLInstruction proto : instrList) { score = 0; // Check parameter count if (instr.getOperandCount() == proto.getParameterCount()) score += NUM_OPERANDS_SCORE; if (Options.summarizeRep.getValue() && proto.getName().startsWith("REP") && proto.getName().endsWith("SUMMARY")) { score++; } // Do parameter matching for (int i = 0; i < instr.getOperandCount(); i++) { Operand oper = instr.getOperand(i); // Special handling for implicit operands if (oper instanceof Register && ( (oper.equals(X86Registers.EAX) && proto.getName().endsWith("EAX")) || (oper.equals(X86Registers.AX) && proto.getName().endsWith("AX")) || (oper.equals(X86Registers.AL) && proto.getName().endsWith("AL")) || (oper.equals(X86SegmentRegisters.CS) && proto.getName().endsWith("CS")) || (oper.equals(X86SegmentRegisters.DS) && proto.getName().endsWith("DS")) || (oper.equals(X86SegmentRegisters.ES) && proto.getName().endsWith("ES")) || (oper.equals(X86SegmentRegisters.FS) && proto.getName().endsWith("FS")) || (oper.equals(X86SegmentRegisters.GS) && proto.getName().endsWith("GS")) || (oper.equals(X86SegmentRegisters.SS) && proto.getName().endsWith("SS")) )) { score += IMPLICIT_OPERAND_MATCH_SCORE; if (instr.getOperandCount() == proto.getParameterCount() + 1) { score += NUM_OPERANDS_SCORE; } } if (proto.getParameter(i) == null) continue; String param = proto.getParameter(i).getName(); if ((oper instanceof Register && param.startsWith("reg")) || (oper instanceof X86FloatRegister && param.equals("sti")) || (oper instanceof Immediate && param.equals("i" + Integer.toString(((Immediate)oper).getDataType().bits()))) || (oper instanceof MemoryOperand && (param.equals("modrm") || param.equals("mem"))) || (oper instanceof PCRelativeAddress && (param.startsWith("reloc")))) score += OPERAND_TYPE_EXACT_SCORE; else if (oper instanceof Register && param.equals("modrm")) score += OPERAND_TYPE_MATCH_SCORE; } /*if (logger.isDebugEnabled()) sb.append(proto.getName() + "=" + score + " ");*/ if (score > maxScore) { maxMatch = proto; maxScore = score; } } //logger.debug(sb); return maxMatch; } /** * Returns the RTL sequence which corresponds to the specified assembly instruction. * The instruction is looked up in the library and the RTL template is instantiated * with the instruction's parameters. * * @param address the address of the instruction * @param instr the assembly instruction to be translated to RTL * @return a sequence of RTL statements that match the instruction's behavior */ public StatementSequence getRTLEquivalent(AbsoluteAddress address, Instruction instr) { StatementSequence rtlTemplate = null; SSLInstruction sslInstr = matchInstruction(instr); if (sslInstr == null) { logger.warn(address + ": No equivalent SSL instruction found for: " + instr.getName()); } else { rtlTemplate = sslInstr.getBody(); } Context instrParamContext = new Context(); boolean excessAsmOps = false; if (sslInstr != null && instr.getOperandCount() > sslInstr.getParameterCount()) { excessAsmOps = true; logger.debug("Different number of operands for " + instr.getName() + " (" + instr.getOperandCount() + ") and " + sslInstr.toString() + " (" + sslInstr.getParameterCount() + ")!"); logger.debug("Unassigned operand: " + instr.getOperand(0)); for (int i = sslInstr.getParameterCount() + 1; i < instr.getOperandCount(); i++) logger.debug("Unassigned operand: " + instr.getOperand(i).toString()); } if (!(sslInstr == null || instr.getOperandCount() >= sslInstr.getParameterCount())) { logger.error("Instruction: " + address + ": " + instr.toString(address.getValue(), DummySymbolFinder.getInstance())); logger.error("Template: " + sslInstr); throw new RuntimeException("Too few operands in ASM instruction for SSL template"); } /* Transform Parameters. If there are excessive Asm operands, skip the first. * This fixes the problem with an implicit EAX operand. Might not work on other * architectures than x86! */ if (sslInstr != null) for (int i=0; i<sslInstr.getParameterCount(); i++) { Operand iOp = excessAsmOps ? instr.getOperand(i+1) : instr.getOperand(i); RTLExpression opAsExpr = ExpressionFactory.createOperand(iOp); instrParamContext.substitute(sslInstr.getParameter(i), opAsExpr); } /* Assign PC - the PC value in the RTL is that of the next instruction in Intel assembly */ long pcValue = address.getValue(); if (instr instanceof X86Instruction) pcValue += instr.getSize(); instrParamContext.addAssignment(ExpressionFactory.pc, ExpressionFactory.createNumber(pcValue, ExpressionFactory.pc.getBitWidth())); if (rtlTemplate == null) { logger.debug("Null RTL body for instruction: " + instr.getName()); StatementSequence newSeq = new StatementSequence(); //newSeq.addFirst(new RTLDirective("ASM_" + instr.getName())); newSeq.addFirst(new RTLSkip()); rtlTemplate = newSeq; } StatementSequence instrRTL = rtlTemplate.copy(); int rtlId = 0; instrRTL = instrRTL.evaluate(instrParamContext); if (instrRTL != null) { // we need to label only after evaluation, as some instructions might disappear for (RTLStatement stmt : instrRTL) { stmt.setLabel(address, rtlId++); stmt.setNextLabel(new RTLLabel(address, rtlId)); } } else { logger.debug("Detected semantic nop during instantiation: " + address); instrRTL = new StatementSequence(); RTLSkip nop = new RTLSkip(); nop.setLabel(address, 0); instrRTL.addFirst(nop); } // set next label of the last statement to fall-through instruction instrRTL.getLast().setNextLabel(new RTLLabel(new AbsoluteAddress(address.getValue() + instr.getSize()), 0)); // infer missing bit widths: try { for (RTLStatement s : instrRTL) s.inferTypes(this); } catch (TypeInferenceException e) { e.printStackTrace(); logger.error("Instruction: " + instr.toString(pcValue, DummySymbolFinder.getInstance())); logger.error("RTL: " + instrRTL); throw new RuntimeException(); } // evaluate again, to use inferred bit widths. instrRTL = instrRTL.evaluate(new Context()); // Remove bitranges on LHS and split AssignmentTemplates into memory and variables instrRTL = instrRTL.normalizeAssignments(); // One more simplification step instrRTL = instrRTL.evaluate(new Context()); return instrRTL; } public int getAddressBitWidth() { return stackPointer.getBitWidth(); } public void parseSSL(String fileName) throws FileNotFoundException, ANTLRException { specFile = new File(fileName); logger.info("Reading machine specification from " + specFile.getName() + "."); SSLLexer lex = new SSLLexer(new FileInputStream(specFile)); SSLParser parser = new SSLParser(lex); SSLPreprocessor prep = new SSLPreprocessor(); parser.start(); prep.start(parser.getAST()); Map<String,SSLFunction> instrPrototypes = prep.getInstructions(); registers = prep.getRegisters(); registers.removeAll(statusFlags); logger.debug("-- Got " + instrPrototypes.size() + " instructions."); instructions = new TreeMap<String, SSLInstruction>(); instrGroups = new TreeMap<String, List<SSLInstruction>>(); for (Iterator<Map.Entry<String, SSLFunction>> iterator = instrPrototypes.entrySet().iterator(); iterator.hasNext();) { Map.Entry<String, SSLFunction> entry = iterator.next(); String name = entry.getKey(); SSLFunction proto = entry.getValue(); //logger.debug("Converting " + proto.getName() + " = " + proto.getAST().toStringTree()); StatementSequence rtlBody = prep.convertToRTL(proto.getAST()); // Do a first evaluation step rtlBody = rtlBody.evaluate(new Context()); // Check is necessary for empty instructions, such as INT3 or NOP if (rtlBody != null) { // Canonize the RTL statements rtlBody = rtlBody.canonize(); RTLGoto.Type gotoType; if (proto.getName().startsWith("RET")) gotoType = RTLGoto.Type.RETURN; else if (proto.getName().startsWith("CALL")) gotoType = RTLGoto.Type.CALL; else gotoType = RTLGoto.Type.JUMP; // When there is a %pc assignment as last statement in a sequence, turn it to a GOTO. // We cannot do that if it appears earlier, b/c we might lose changes to variables // coming behind the %pc assignment! RTLStatement exitStatement = rtlBody.getLast(); if (exitStatement instanceof AssignmentTemplate) { AssignmentTemplate assignment = (AssignmentTemplate)exitStatement; if (assignment.getLeftHandSide().equals(ExpressionFactory.pc)) { RTLGoto newGoto; RTLExpression rhs = assignment.getRightHandSide(); if (rhs instanceof RTLConditionalExpression) { RTLConditionalExpression condExpr = (RTLConditionalExpression)rhs; if (condExpr.getTrueExpression() == ExpressionFactory.pc) { // conditional goto to false expression newGoto = new RTLGoto(condExpr.getFalseExpression(), ExpressionFactory.createNot(condExpr.getCondition()), gotoType); } else if (condExpr.getFalseExpression() == ExpressionFactory.pc) { // conditional goto to true expression newGoto = new RTLGoto(condExpr.getTrueExpression(), condExpr.getCondition(), gotoType); } else { logger.error("Dual branch in SSL definition: " + rtlBody); assert false; newGoto = new RTLGoto(rhs, gotoType); } } else { // unconditional goto newGoto = new RTLGoto(rhs, gotoType); } rtlBody = rtlBody.replace(assignment, newGoto); } } // If there is any pc-assignment, then add a goto to the end if (rtlBody.getDefinedVariables().contains(ExpressionFactory.pc)) { rtlBody.addLast(new RTLGoto(ExpressionFactory.pc, gotoType)); } } else logger.debug("Null rtl body: " + proto); SSLInstruction instr = new SSLInstruction(proto.getName(), proto.getParameters(), rtlBody); //logger.debug("Result:\n" + instr.getBody()); instructions.put(instr.getName(), instr); String[] parts = name.split("\\."); if (!instrGroups.containsKey(parts[0])) { List<SSLInstruction> instrList = new LinkedList<SSLInstruction>(); instrList.add(instr); instrGroups.put(parts[0], instrList); } else instrGroups.get(parts[0]).add(instr); } logger.debug("-- Suffix map has " + instrGroups.size() + " unique instructions."); } }