/* * Program.java - This file is part of the Jakstab project. * Copyright 2007-2015 Johannes Kinder <jk@jakstab.org> * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, see <http://www.gnu.org/licenses/>. */ package org.jakstab; import java.io.File; import java.io.IOException; import java.util.*; import org.jakstab.util.Logger; import org.jakstab.asm.*; import org.jakstab.cfa.CFAEdge; import org.jakstab.cfa.ControlFlowGraph; import org.jakstab.cfa.FineGrainedCFG; import org.jakstab.cfa.RTLLabel; import org.jakstab.disasm.DisassemblyException; import org.jakstab.loader.*; import org.jakstab.loader.elf.ELFModule; import org.jakstab.loader.pe.*; import org.jakstab.rtl.expressions.SetOfVariables; import org.jakstab.rtl.statements.RTLHalt; import org.jakstab.rtl.statements.RTLSkip; import org.jakstab.rtl.statements.RTLStatement; import org.jakstab.rtl.statements.StatementSequence; import org.jakstab.ssl.Architecture; import org.jakstab.util.FastSet; /** * There is one singleton Program object for all modules currently under analysis. * It stores all non-analysis information about the analyzed programs, including * statements, the current control flow graph, and symbols. * * @author Johannes Kinder */ public final class Program { private final static Logger logger = Logger.getLogger(Program.class); private static Program programInstance; /** * Get the singleton Program object. * * @return the singleton instance of the Program class */ public static Program getProgram() { return programInstance; } /** * Initially creates the Program object. * * @param arch An Architecture object with architecture specific information * @return the new singleton instance of the Program class */ public static Program createProgram(Architecture arch) { //assert programInstance == null; programInstance = new Program(arch); return programInstance; } private final Architecture arch; private RTLLabel start; private Map<RTLLabel, RTLStatement> statementMap; private Map<AbsoluteAddress, Instruction> assemblyMap; private ExecutableImage mainModule; private List<ExecutableImage> modules; private ControlFlowGraph cfg; private final Map<String, ExportedSymbol> exportedSymbols; private final Set<UnresolvedSymbol> unresolvedSymbols; private Set<RTLLabel> unresolvedBranches; private StubProvider stubLibrary; private Harness harness; public enum TargetOS {WINDOWS, LINUX, UNKNOWN}; private TargetOS targetOS; private Program(Architecture arch) { this.arch = arch; this.targetOS = TargetOS.UNKNOWN; modules = new LinkedList<ExecutableImage>(); assemblyMap = new TreeMap<AbsoluteAddress, Instruction>(); statementMap = new HashMap<RTLLabel, RTLStatement>(2000); exportedSymbols = new HashMap<String, ExportedSymbol>(); unresolvedSymbols = new FastSet<UnresolvedSymbol>(); unresolvedBranches = new FastSet<RTLLabel>(); } /** * Loads the module containing the main function. This function should be called last * for correct symbol resolution. * * @param moduleFile the file to load * @return the ExecutableImage class for the loaded module * @throws IOException * @throws BinaryParseException */ public ExecutableImage loadMainModule(File moduleFile) throws IOException, BinaryParseException { ExecutableImage module = loadModule(moduleFile); mainModule = module; setEntryAddress(module.getEntryPoint()); installStubs(); return module; } /** * Loads a secondary (library or stub) module for analysis. Automatically determines * the correct file type. * * @param moduleFile the file to load * @return the ExecutableImage class for the loaded module * @throws IOException * @throws BinaryParseException */ public ExecutableImage loadModule(File moduleFile) throws IOException, BinaryParseException { // First try to load it as a PE file, then object file, ELF and finally raw binary code // The right thing to do would be some smart IDing of the file type, but // this exception chaining works for now... ExecutableImage module = null; try { module = new PEModule(moduleFile, getArchitecture()); targetOS = TargetOS.WINDOWS; } catch (BinaryParseException e) { try { module = new ObjectFile(moduleFile, getArchitecture()); } catch (BinaryParseException e2) { try { module = new ELFModule(moduleFile, getArchitecture()); targetOS = TargetOS.LINUX; } catch (BinaryParseException e3) { module = new RawModule(moduleFile, getArchitecture()); } } } for (ExecutableImage existingModule : modules) { if (existingModule.getMaxAddress().getValue() >= module.getMinAddress().getValue() && existingModule.getMinAddress().getValue() <= module.getMaxAddress().getValue()) { throw new RuntimeException("Virtual addresses of modules overlap!"); } } modules.add(module); unresolvedSymbols.addAll(module.getUnresolvedSymbols()); for (ExportedSymbol symbol : module.getExportedSymbols()) { exportedSymbols.put(removeDecoration(symbol.getName()), symbol); } resolveSymbols(); return module; } private String removeDecoration(String s) { if (s.charAt(0) == '@' || s.charAt(0) == '_') s = s.substring(1); int i = s.indexOf('@'); if (i >= 0) s = s.substring(0, i); return s; } /** * Resolves symbols between the loaded modules. */ private void resolveSymbols() { Iterator<UnresolvedSymbol> sIter = unresolvedSymbols.iterator(); while (sIter.hasNext()) { UnresolvedSymbol unresolvedSymbol = sIter.next(); ExportedSymbol symbol = exportedSymbols.get(removeDecoration(unresolvedSymbol.getName())); if (symbol != null) { logger.debug("Resolving symbol " + unresolvedSymbol.getName()); unresolvedSymbol.resolve(symbol.getAddress()); sIter.remove(); } } } /** * Returns the address of the given procedure within the given library. Procedures * present within the analyzed modules are given precedence over stub functions. * * @param library * @param procedure * @return the virtual address of the procedure */ public AbsoluteAddress getProcAddress(String library, String procedure) { ExportedSymbol expSymbol = exportedSymbols.get(procedure); if (expSymbol != null) { return expSymbol.getAddress(); } else { return stubLibrary.resolveSymbol(library, procedure); } } public boolean isStub(AbsoluteAddress a) { return a.getValue() >= StubProvider.STUB_BASE; } public boolean isImport(AbsoluteAddress a) { if (isStub(a)) return true; ExecutableImage m = getModule(a); if (m == null) return false; return m.isImportArea(a); } /** * For all unresolved symbols, install simple stubs. */ public void installStubs() { if (mainModule instanceof AbstractCOFFModule) { stubLibrary = new Win32StubLibrary(arch); } else if (mainModule instanceof ELFModule){ stubLibrary = new LinuxStubLibrary(arch); } Iterator<UnresolvedSymbol> sIter = unresolvedSymbols.iterator(); while (sIter.hasNext()) { UnresolvedSymbol unresolvedSymbol = sIter.next(); AbsoluteAddress address = stubLibrary.resolveSymbol(unresolvedSymbol.getFromLibrary(), unresolvedSymbol.getName()); if (address != null) { //logger.debug("Installing stack height stub for " + unresolvedSymbol.getName()); unresolvedSymbol.resolve(address); sIter.remove(); } } if (!unresolvedSymbols.isEmpty()) logger.warn("Unresolved symbols remaining: " + unresolvedSymbols); } /** * Install a harness that sets up the symbolic environment before calling main * and provides a return point with a termination statement. * * @param harness the harness object to install */ public void installHarness(Harness harness) { this.harness = harness; harness.install(this); } /** * Set the program entry point to the given label. * @param label the new entry point */ public void setStart(RTLLabel label) { this.start = label; } /** * Set the program entry point to the given address. * @param entryAddress the new entry address */ public void setEntryAddress(AbsoluteAddress entryAddress) { setStart(new RTLLabel(entryAddress)); } /** * Get the main module. * @return the main module */ public ExecutableImage getMainModule() { return mainModule; } /** * Get the module that contains the specified virtual address at runtime. * @param a a virtual address * @return the module to which the given virtual address belongs. */ public ExecutableImage getModule(AbsoluteAddress a) { for (ExecutableImage module : modules) { if (module.getFilePointer(a) >= 0) return module; } return null; } public Iterator<AbsoluteAddress> codeAddressIterator() { return getMainModule().codeBytesIterator(); } /** * Get the statement at a specific label. If there is no statement stored, attempts * to disassemble the instruction at the label's virtual address. If the address is * outside of the file area, logs an error and returns a Halt statement by default. * * @param label The label for which to get the statement * @return The statement object at label. */ public final RTLStatement getStatement(RTLLabel label) { if (!statementMap.containsKey(label)) { AbsoluteAddress address = label.getAddress(); Instruction instr = getInstruction(address); // If we did not get an instruction, add an artificial Halt for recovery if (instr == null) { RTLHalt halt = new RTLHalt(); halt.setLabel(label); putStatement(halt); logger.error("ERROR: Replacing unknown instruction with HALT."); if (Options.debug.getValue()) throw new DisassemblyException("Disassembly failed at " + address); } else { try { StatementSequence seq = arch.getRTLEquivalent(address, instr); for (RTLStatement s : seq) { putStatement(s); } } catch (Exception e) { logger.error("Error during translation of instruction to IL"); e.printStackTrace(); RTLStatement skip = new RTLSkip(); skip.setLabel(label); skip.setNextLabel(new RTLLabel(new AbsoluteAddress(address.getValue() + 1))); putStatement(skip); } assert statementMap.containsKey(label) : "Disassembly did not produce label: " + label; } } return statementMap.get(label); } /** * Stores a statement in the program. If a statement already exists with the same * label, it is replaced. * * @param stmt The statement to be stored. Has to contain a proper label. */ public final void putStatement(RTLStatement stmt) { RTLStatement existing = statementMap.get(stmt.getLabel()); if (existing != null) { if (existing.equals(stmt)) return; logger.debug("Replacing statement at " + stmt.getLabel()); } statementMap.put(stmt.getLabel(), stmt); } public boolean containsLabel(RTLLabel label) { return statementMap.containsKey(label); } public final int getStatementCount() { return statementMap.size(); } public final int getInstructionCount() { return assemblyMap.size(); } public Harness getHarness() { return harness; } /** * Gets the assembly instruction at the specified virtual address. * @param address a virtual address * @return the assembly instruction at the specified address */ public final Instruction getInstruction(AbsoluteAddress address) { Instruction instr = assemblyMap.get(address); if (instr != null) { return instr; } else { // No real instructions in prologue/epilogue if (harness.contains(address) || isStub(address)) return null; ExecutableImage module = getModule(address); long fp = -1; if (module == null) { logger.error("No module for address " + address + ". Cannot disassemble instruction!"); } else { fp = module.getFilePointer(address); // Also check whether fp is out of the int range, since the X86Disassembler actually // performs this cast in its implementation. if (fp < 0 || (int)fp < 0) { logger.error("Requested instruction outside of file area: " + address); } else { if (!module.isCodeArea(address)) { logger.error("Requested instruction outside code section: " + address); return null; } instr = module.getDisassembler().decodeInstruction(fp); if (instr == null) { logger.error("Instruction could not be disassembled at: " + address); } } } if (instr != null) putInstruction(address, instr); return instr; } } /** * Stores an assembly instruction at the given address, overwriting * any existing instruction. * @param addr the virtual address to save the instruction at * @param instr the assembly instruction * @return true if there was no instruction stored for that address before, false otherwise. */ public final boolean putInstruction(AbsoluteAddress addr, Instruction instr) { //logger.info(addr + " " + instr.toString(addr.getValue(), new DummySymbolFinder())); return assemblyMap.put(addr, instr) == null; } /** * Get the string representation of the assembly instruction at the given address. * @param addr a virtual address * @return a string representation of the assembly code at the given address */ public String getInstructionString(AbsoluteAddress addr) { Instruction instr = getInstruction(addr); if (instr == null) return "NON_EXISTENT"; return instr.toString(addr.getValue(), symbolFinder(addr)); } /** * Get the string representation of the specified assembly instruction assuming * it is located at the given address. * @param addr a virtual address * @param instr an assembly instruction * @return a string representation of the assembly code at the given address */ public String getInstructionString(AbsoluteAddress addr, Instruction instr) { if (instr == null) return "NON_EXISTENT"; return instr.toString(addr.getValue(), symbolFinder(addr)); } public String getSymbolFor(RTLLabel label) { SymbolFinder symFinder = symbolFinder(label.getAddress()); if (symFinder.hasSymbolFor(label.getAddress())) { return symFinder.getSymbolFor(label.getAddress()); } else { return label.toString(); } } public String getSymbolFor(AbsoluteAddress addr) { return symbolFinder(addr).getSymbolFor(addr); } public AbsoluteAddress getAddressForSymbol(String symbol) { for (ExecutableImage module : modules) { AbsoluteAddress a = module.getSymbolFinder().getAddressFor(symbol); if (a != null) return a; } logger.error("Could not find address for symbol \"" + symbol + "\""); return null; } private SymbolFinder symbolFinder(AbsoluteAddress addr) { if (isStub(addr)) return stubLibrary.getSymbolFinder(); ExecutableImage module = getModule(addr); return (module == null) ? DummySymbolFinder.getInstance() : module.getSymbolFinder(); } public Set<RTLLabel> getUnresolvedBranches() { return unresolvedBranches; } public void setUnresolvedBranches(Set<RTLLabel> unresolvedBranches) { this.unresolvedBranches = unresolvedBranches; } /** * @return the assemblyMap */ public final Map<AbsoluteAddress, Instruction> getAssemblyMap() { return assemblyMap; } public TargetOS getTargetOS() { return targetOS; } /** * Returns all variables used in the program. At the current state of * the implementation, this includes only registers and flags. * * @return A set containing all variables used in this program. */ public SetOfVariables getUsedVariables() { SetOfVariables result = new SetOfVariables(); for (CFAEdge edge : cfg.getEdges()) result.addAll(((RTLStatement)edge.getTransformer()).getUsedVariables()); return result; } public Architecture getArchitecture() { return arch; } public Collection<ExportedSymbol> getSymbols() { return exportedSymbols.values(); } public ControlFlowGraph getCFG() { return cfg; } public void setCFA(Set<CFAEdge> cfa) { cfg = new FineGrainedCFG(cfa); } public RTLLabel getStart() { return start; } public int countIndirectBranches() { int res = 0; for (Map.Entry<AbsoluteAddress, Instruction> entry : assemblyMap.entrySet()) { Instruction instr = entry.getValue(); if (instr instanceof BranchInstruction) { BranchInstruction branch = (BranchInstruction)instr; if (branch.isIndirect()) { // if branch target is not a memory operand pointing into a static data area of the binary (imports) if (branch.getBranchDestination() instanceof MemoryOperand) { MemoryOperand memOp = (MemoryOperand)branch.getBranchDestination(); // Import calls have only displacement if (memOp.getBase() == null && memOp.getIndex() == null) { AbsoluteAddress disp = new AbsoluteAddress(memOp.getDisplacement()); // Check whether displacement points into import table ExecutableImage module = getModule(disp); if (module instanceof PEModule && ((PEModule)module).getImportTable().containsKey(disp)) continue; } } res++; //logger.verbose(entry.getKey() + "\t" + getInstructionString(entry.getKey())); } } } return res; } }