/* SAAF: A static analyzer for APK files.
* Copyright (C) 2013 syssec.rub.de
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.rub.syssec.saaf.application.instructions;
import java.util.Arrays;
import java.util.LinkedList;
import org.apache.log4j.Logger;
import de.rub.syssec.saaf.misc.ByteUtils;
import de.rub.syssec.saaf.misc.config.Config;
import de.rub.syssec.saaf.model.application.BasicBlockInterface;
import de.rub.syssec.saaf.model.application.CodeLineInterface;
import de.rub.syssec.saaf.model.application.ConstantInterface;
import de.rub.syssec.saaf.model.application.SyntaxException;
import de.rub.syssec.saaf.model.application.instruction.InstructionInterface;
import de.rub.syssec.saaf.model.application.instruction.InstructionType;
/**
* This class holds information about the SMALI opcodes which are parsed from
* the SMALI files.
*
* @author Johannes Hoffmann <johannes.hoffmann@rub.de>
*
*/
public class Instruction implements InstructionInterface {
private static final boolean DEBUG=Boolean.parseBoolean(System.getProperty("debug.slicing","false"));
private final CodeLineInterface codeLine;
private byte[] opCode = null;
private InstructionType type = InstructionType.NOT_YET_PARSED;
/**
* The register where the result of the operation is located, may be null.
*/
private byte[] resultRegister = null;
/**
* The field where the result of the operation is located, my be null. The
* first parameter is class name, the second one the field name.
*/
private byte[][] resultField = null; // TODO: use this to directly linkt to Field.class?
/**
* The involved registers in this operation, eg, when calling a method.
*/
private LinkedList<byte[]> involvedRegisters = new LinkedList<byte[]>();
/**
* The involved fields in this operation, eg, when copying a variable into a
* register.
*/
private LinkedList<byte[]> involvedFields = new LinkedList<byte[]>();
/**
* The class, method and parameters of invoke opcodes. cmpr[0] is the class,
* cmpr[1] the method, cmpr[2] the raw parameters and cmpr[3] the return value.
*/
private byte[][] cmpr = null;
/**
* This is the value which gets assigned by the const-x opcodes, the
* constant which is involved in some binary math opcode or the values
* during array initialization. May be null.
*/
private ConstantInterface constant = null;
/**
* Denotes whether this Instructions holds a constant
*/
private boolean hasConstant = false;
/**
* Denotes the label or some opcode that indicates where to jump to, eg,
* fill-array-data.
*/
private byte[] label = null;
private static final Logger LOGGER = Logger.getLogger(Instruction.class);
public Instruction(CodeLineInterface codeLine) {
this.codeLine = codeLine;
if (codeLine.isEmpty()) {
type = InstructionType.EMPTY_LINE;
} else if (codeLine.startsWith(new byte[] { '.' })) {
type = InstructionType.SMALI_DOT_COMMENT;
} else if (codeLine.startsWith(new byte[] { ':' })) {
type = InstructionType.LABEL;
} else if (codeLine.startsWith(new byte[] { '#' })) {
type = InstructionType.SMALI_HASH_KEY_COMMENT;
} else { // a shortcut, opcodes should begin with a lowercase letter
byte firstByte = codeLine.getLine()[0]; // cannot be empty, see first check
if (firstByte < 97 || firstByte > 122) { // a and z
type = InstructionType.UNKNOWN;
}
}
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#parseOpCode()
*/
@Override
public void parseOpCode() {
// Let us define the type of the opcode if we do not already know it is
// no opcode at all
if (!(type == InstructionType.EMPTY_LINE
|| type == InstructionType.SMALI_DOT_COMMENT
|| type == InstructionType.LABEL
|| type == InstructionType.SMALI_HASH_KEY_COMMENT || type == InstructionType.UNKNOWN
// do not ask the map if we know it does not begin w/ a lowercase letter
)) {
LinkedList<byte[]> split = split(codeLine.getLine());
opCode = split.getFirst();
type = InstructionMap.getType(opCode);
// Now let us parse the opcode if it is a opcode that we know of
if (!(type == InstructionType.UNKNOWN
|| type == InstructionType.EMPTY_LINE
|| type == InstructionType.SMALI_DOT_COMMENT
|| type == InstructionType.LABEL || type == InstructionType.SMALI_HASH_KEY_COMMENT)) {
parse(split);
}
}
}
/**
* Split a byte[] at ' ' and ',' but do not split between { } and " " ('{',
* '}', ',' and ' ' inside quotes are ignored);
*
* @return the byte arrays between the above signs, but without them!
*/
public static LinkedList<byte[]> split(byte[] input) {
// if (Config.DBG_SLICING) LOGGER.debug("split: '"+new String(input)+"'");
LinkedList<byte[]> list = new LinkedList<byte[]>();
int lastIndex = 0;
boolean inQuotes = false;
boolean inKlammer = false;
boolean copyLastSequence = true;
/*
* Used for special cases like .local v15, list:Ljava/util/Map;,
* "Ljava/util/Map<Ljava/lang/String;Ljava/lang/Object;>;" __ ___ We
* need to ignore the underlined quotes.
*/
boolean skipNextQuote = false;
for (int i = 0; i < input.length; i++) {
switch (input[i]) {
case ' ':
if (!inQuotes && !inKlammer) { // split it
if (lastIndex != i)
list.addLast(ByteUtils.subbytes(input, lastIndex, i));
lastIndex = i + 1; // do not copy ' ' the next time
} else {
// do nothing
}
break;
case ',': // same as ' '
if (!inQuotes && !inKlammer) { // split it
/*
* Dirty workaround for lines like .local v15,
* list:Ljava/util
* /Map;,"Ljava/util/Map<Ljava/lang/String;Ljava/lang/Object;>;"
* If this is not checked, ___ this would be splitted!, but
* the last split should occur before list:Ljave/util....
*/
if ((i + 1 < input.length) && input[i + 1] == '"'
&& (i - 1 >= 0) && input[i - 1] == ';') {
// first checks are for array boundaries
break; // do not split here
}
if (lastIndex != i)
list.addLast(ByteUtils.subbytes(input, lastIndex, i));
lastIndex = i + 1; // do not copy ' ' the next time
if (i == input.length - 1)
copyLastSequence = false; // reached the end
} else {
// do nothing
}
break;
case '{':
if (!inQuotes && !inKlammer) { // opening {, therefore aggregate
// everything between, " "
// should always be previous
// char
inKlammer = true;
lastIndex = i + 1; // do not copy { the next time something
// is copied
} else if (!inQuotes && inKlammer) {
// break, this should not happen?!
LOGGER.error("Split CL: Found { although another { was found!");
}
break;
case '}':
if (inKlammer && !inQuotes) { // found closing }
// copy all except { and }
list.addLast(ByteUtils.subbytes(input, lastIndex, i));
lastIndex = i + 1; // do not copy } the next time something
// is copied
inKlammer = false;
if (i == input.length - 1)
copyLastSequence = false; // reached the end
} else if (!inKlammer && !inQuotes) {
// break, this should not happen?!
LOGGER.error("Split CL: Found } although !inQuotes && !inKlammer");
}
break;
case '"':
/*
* The two IFs are a workaround for lines like .local v15,
* list:Ljava/util/Map;,
* "Ljava/util/Map<Ljava/lang/String;Ljava/lang/Object;>;" The
* last split should occur before list:Ljave/util.... See
* skipNextQuote note.
*/
if (skipNextQuote) {
skipNextQuote = false; // Consider next quote again
break;
}
if (!inKlammer && !inQuotes) { // beginning quotes
if ((i - 2 >= 0) && input[i - 1] == ','
&& input[i - 2] == ';') { // first check is for
// array boundaries
skipNextQuote = true;
break; // do not split here AND ignore next '"'
}
lastIndex = i;
inQuotes = true;
} else if (!inKlammer && inQuotes) {
if (input[i - 1] == '\\')
continue; // ignore, " im String
else { // quotes end/close
// copy all except the " at the beginning and end
list.addLast(ByteUtils.subbytes(input, lastIndex, i + 1));
lastIndex = i + 1;
copyLastSequence = false;
inQuotes = false;
}
} else {
// break, this should not happen?!
LOGGER.error("Split CL: Found unexpected \"!");
}
break;
default:
// found a normal sign :)
continue;
}
}
// check if >= 0, otherwise the last element was already copied. this is
// only relevant if the last part is a "xyz"
if (copyLastSequence) {
// copy last or only the one element
list.addLast(ByteUtils.subbytes(input, lastIndex, input.length));
}
// for (byte[] bb : list) {
// System.out.println(" ] = "+new String(bb));
// }
return list;
}
/**
* Parse something like {v0 .. v5}, {v7} or {v7, v8} or even "v1 v2 v3". '{'
* and '}' are optional, but may only occur in a single pair.
*
* @param parameters
* the byte array as described above
* @return a list w/ all the registers
*/
private static LinkedList<byte[]> parseParameter(byte[] parameters) {
// LOGGER.logDebug(Instruction.class, "parseParameter: '"+new
// String(parameters)+"'");
LinkedList<byte[]> result = new LinkedList<byte[]>();
if (parameters == null || parameters.length == 0) {
if (DEBUG) LOGGER.debug("parseParameter: empty parameters detected.");
return result;
} else if (parameters[0] == '{'
&& parameters[parameters.length - 1] == '}') { // sanity check
// strip the '{' and '}'
parameters = ByteUtils.subbytes(parameters, 1,
parameters.length - 1);
}
int lastIndex = 0;
if (ByteUtils.contains(parameters, ',')) { // eg {v7, v8}
boolean found = true;
for (int i = 0; i < parameters.length; i++) {
if (found) {
if (parameters[i] != ',') {
continue;
} else {
found = false;
result.addLast(ByteUtils.subbytes(parameters,
lastIndex, i));
}
} else { // currently no register is read
if (parameters[i] != ' ') {
found = true;
lastIndex = i;
} else { // found ' ', do nothing
continue;
}
}
}
// copy last (or only one) register
result.add(ByteUtils.subbytes(parameters, lastIndex,
parameters.length));
} else if (ByteUtils.contains(parameters, '.')) { // eg {v0 .. v5}
byte[] vA = null;
byte[] vB = null;
boolean found = true;
for (int i = 0; i < parameters.length; i++) {
if (found) {
if (parameters[i] != ' ') {
continue;
} else {
found = false;
// lastIndex+1: cut the v from eg v12
vA = ByteUtils.subbytes(parameters, lastIndex + 1, i);
}
} else { // currently no register is read
if (parameters[i] != ' ' && parameters[i] != '.') {
found = true;
lastIndex = i;
} else { // found ' ', do nothing
continue;
}
}
}
// copy last (or only one) register
vB = ByteUtils.subbytes(parameters, lastIndex + 1,
parameters.length); // lastIndex+1: cut the v from eg v12.
// now "create" all intermediate registers and put them into the
// list
int fromReg = Integer.parseInt(new String(vA));
int toReg = Integer.parseInt(new String(vB));
while (fromReg <= toReg) {
result.addLast(("v" + fromReg).getBytes());
fromReg++;
}
} else { // eg {v7}
result.add(parameters);
}
return result;
}
/**
* This method sets everything up, it has to be called in the constructor!
* ref:
* http://www.milk.com/kodebase/dalvik-docs-mirror/docs/dalvik-bytecode.html
*
* @param codeLine
* @throws UnknownOpCodeException
*/
private void parse(LinkedList<byte[]> split) {
byte[] opCodeLine = codeLine.getLine();
switch (type) {
case NEW_INSTANCE:
// new-instance vAA, type@BBBB
resultRegister = parseParameter(split.get(1)).getFirst();
break;
case INVOKE_STATIC: // same as INVOKE
case INVOKE:
// invoke-virtual/range {v0 .. v5},
// Landroid/content/ContentResolver;->query(Landroid/net/Uri;[Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;Ljava/lang/String;)Landroid/database/Cursor;
// invoke-interface {v7}, Landroid/database/Cursor;->moveToNext()Z
// invoke-virtual {v7},
// Ljava/io/PrintStream;->println(Ljava/lang/String;)V
// Must not always have a move-result
// split: 0=opcode, 1=registers, 2=class->method(types)returnType
involvedRegisters = parseParameter(split.get(1));
// now parse the class and the method which is called
cmpr = parseClassAndMethodAndParameterAndReturnValue(split.getLast());
break;
case AGET:
/**
* aget-object v0, v0, v1
* arrayop vAA, vBB, vCC
* Store data from array vBB at index vCC into vAA
*
* The array index (vC) is ignored
*/
resultRegister = split.get(1); // vA
involvedRegisters.add(split.get(2)); // vB
break;
case GET:
/**
* iinstanceop vA, vB, field@CCCC iget-x sstaticop vAA, field@BBBB
* sget-x sget-object v1,
* Lcom/andiord/SMSOperator;->CONTENT_URI:Landroid/net/Uri;
*/
if (opCodeLine[0] == 'i') { // instance-op
resultRegister = split.get(1); // vA
involvedRegisters.add(split.get(2)); // vB
involvedFields.add(split.get(3));
} else if (opCodeLine[0] == 's') { // static-op
resultRegister = split.get(1); // vA
involvedFields.add(split.get(2)); // field
}
break;
case CONST:
// const-string v2, ", protocol="
// const/4 v4, 0x0
resultRegister = split.get(1); // vA
hasConstant = true;
break;
case APUT:
/**
* Put data from vAA into the array vBB at index vCC arrayop vAA,
* vBB, vCC aput-x
*
* We do not care about the array index (vC) right now
*/
resultRegister = split.get(2); // vB
involvedRegisters.add(split.get(1)); // vA
break;
case PUT:
/**
* Save vA in field CCCC of Object vB iinstanceop vA, vB, field@CCCC
* iput-x sstaticop vAA, field@BBBB sput-x sput v0,
* Lcom/lohan/crackme1/example;->Counter:I
*/
if (opCodeLine[0] == 'i') { // instance-op
resultField = parseClassAndField(split.get(3)); // field C
involvedRegisters.add(split.get(1)); // vA
// vB is the reference to the object of field C
} else if (opCodeLine[0] == 's') { // static-op
resultField = parseClassAndField(split.get(2)); // Field
involvedRegisters.add(split.get(1)); // vA
}
break;
case MATH_1: // unary operations
/*
* unop vA, vB
* eg: neg-int, int-to-byte etc
*/
resultRegister = split.get(1);
involvedRegisters.add(split.get(2));
break;
case MATH_2: // binary operations solely on registers
/*
* binop vAA, vBB, vCC
* eg: add-int, or-int, add-int/2addr etc
*/
resultRegister = split.get(1);
involvedRegisters.add(split.get(2));
involvedRegisters.add(split.get(3));
break;
case MATH_2C: // binary operations on a register and a constant
/*
* binop/lit16 vA, vB, #+CCCC
* binop/lit8 vAA, vBB, #+CC
*/
resultRegister = split.get(1);
involvedRegisters.add(split.get(2));
hasConstant = true;
break;
case MOVE:
/*
* move-object vA, vB
* Move content from vB into vA.
*
* move-wide/16 vAAAA, vBBBB <- These are pairs,
* but are only written as eg vX, which means vX and vX+1.
* The bytecode interpreter knows that vX and vX+1 are
* paired and will access them accordingly if, eg, a long
* value is accessed.
*
* Do not handle move-exception and move-result here!
*/
resultRegister = split.get(1); // vA
involvedRegisters.add(split.get(2)); // vB
break;
case MOVE_RESULT:
/*
* move-result vAA This opcode has either a leading INVOKE or a
* leading FILLED_NEW_ARRAY instruction
*/
resultRegister = split.get(1);
break;
case RETURN:
// return-void, return vAA, return-wide vAA, return-object vAA
if (!Arrays.equals(split.getFirst(), "return-void".getBytes())) {
// return void has not return value, therefore we're done
involvedRegisters.add(split.getLast());
}
break;
case NEW_ARRAY:
/*
* new-array vA, vB, type@CCCC vA = array-reference vB = size
* ignored, as are the indexes CC = type (eg String)
*/
resultRegister = split.get(1);
break;
case FILL_ARRAY_DATA:
/*
* byte b[] = { 'x', 'y', 'z'}; ergibt:
*
* fill-array-data v0, :array_0
*
* .. ..
*
* .line 91 return-void .line 88 nop
*
* :array_0 .array-data 0x1 0x78t 0x79t 0x7at .end array-data .end
* method
*/
resultRegister = split.get(1);
label = split.get(2);
hasConstant = true;
break;
case FILLED_NEW_ARRAY:
/*
* filled-new-array {vD, vE, vF, vG, vA}, type@CCCC
* filled-new-array/range {vCCCC .. vNNNN}, type@BBBB
*
* This instruction is followed by a MOVE_RESULT instruction
*/
involvedRegisters = parseParameter(split.get(1));
break;
case GOTO:
label = split.getLast();
break;
case SWITCH:
// TODO: still need to parse the involved register(s)?
label = split.getLast();
break;
case JMP: // if-nez v0, :cond_0
// if (Config.DBG_SLICING) LOGGER.debug("TODO: Did not parse instruction of type " + type
// + ": " + new String(opCode));
// TODO: still need to parse the involved register(s)?
label = split.getLast();
break;
case INTERNAL_SMALI_OPCODE:
/*
* If the resultRegister overwrites our tracked register, we have to
* stop. The following opcodes are relevant: cmpX vAA, vBB, vCC
* (destination, 1st src register, 2nd src register) move-exception
* vAA vAA is the register to where the exception caught is moved
* array-length vA, vB (destination, array reference register)
*/
resultRegister = split.get(1);
break;
case IGNORE:
break;
default:
if (DEBUG) LOGGER.debug("Did not parse instruction of type " + type + ": "
+ new String(opCode));
break;
}
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getType()
*/
@Override
public InstructionType getType() {
return type;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getOpCode()
*/
@Override
public byte[] getOpCode() {
return opCode;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getResultRegister()
*/
@Override
public byte[] getResultRegister() {
return resultRegister;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getResultField()
*/
@Override
public byte[][] getResultField() {
return resultField;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getInvolvedRegisters()
*/
@Override
public LinkedList<byte[]> getInvolvedRegisters() {
return involvedRegisters;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getInvolvedFields()
*/
@Override
public LinkedList<byte[]> getInvolvedFields() {
return involvedFields;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getCalledClassAndMethodWithParameter()
*/
@Override
public byte[][] getCalledClassAndMethodWithParameter() {
return cmpr;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getCalledClassAndMethod()
*/
@Override
public byte[][] getCalledClassAndMethod() {
return getCalledClassAndMethodWithParameter();
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#dump()
*/
@Override
public void dump() {
StringBuilder sb = new StringBuilder();
sb.append("Type: " + type);
sb.append(" CL : " + codeLine);
if (resultRegister != null)
sb.append(" resultReg: " + new String(resultRegister));
if (resultField != null)
sb.append("resultFld: " + new String(resultField[1]) + "."
+ new String(resultField[1]));
if (involvedRegisters.size() > 0) {
sb.append(" invlvdReg: ");
for (byte[] b : involvedRegisters) {
sb.append(new String(b));
sb.append(" ");
}
}
if (involvedFields.size() > 0)
{
sb.append(" invlvdFld: ");
for (byte[] b : involvedFields)
{
sb.append(new String(b));
sb.append(" ");
}
}
if (constant != null)
sb.append(" const: " + constant);
if (cmpr != null)
sb.append(" targetMet: " + new String(cmpr[0]) + "."
+ new String(cmpr[1]) + "(...)");
if (DEBUG) LOGGER.debug(sb.toString());
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getCodeLine()
*/
@Override
public CodeLineInterface getCodeLine() {
return codeLine;
}
/**
* Parse the class and the field from a line like this:
* Lcom/lohan/crackme1/example;->Counter:I This example returns
* [com/lohan/crackme1/example, Counter].
*
* @param smaliCode
* see above
* @return an array with the class being the first element and the fieldname
* the second one, the type is dropped
*/
public static byte[][] parseClassAndField(byte[] smaliCode) {
byte[][] cf = new byte[2][];
int classEnd = ByteUtils.indexOf(smaliCode, ';');
int varName = ByteUtils.indexOf(smaliCode, ':');
cf[0] = ByteUtils.subbytes(smaliCode, 1, classEnd);
cf[1] = ByteUtils.subbytes(smaliCode, classEnd + 3, varName);
return cf;
}
/**
* Parse the class,the method and its parameters from a line like 1)
* Ljava/io/PrintStream;->println(Ljava/lang/String;)V would return [
* java/io/PrintStream , println, Ljava/lang/String; ] 2)
* code=[B->clone()Ljava/lang/Object; would return [ B , clone, '' ]
*
* @param smaliCode
* see above
* @return an array with the class being the first element, the method the
* second one and the parameters the third one
*/
public static byte[][] parseClassAndMethodAndParameterAndReturnValue(byte[] smaliCode) {
byte[][] cmpr = new byte[4][];
int dashPos = ByteUtils.indexOf(smaliCode, '-');
int classEndOffset = 0;
if (smaliCode[dashPos - 1] == ';')
classEndOffset = 1; // if the class it not primitive is terminated
// with a ';', but we do not want to copy it
int methodEnd = ByteUtils.indexOf(smaliCode, '(');
int parametersEnd = ByteUtils.indexOf(smaliCode, ')');
int offset = 0;
for (byte b : smaliCode) { // read array dimension: [
if (b == '[')
offset++;
else
break;
}
if (smaliCode[offset] == 'L') {
offset++; // we have a class an want to also skip the L
}
cmpr[0] = ByteUtils
.subbytes(smaliCode, offset, dashPos - classEndOffset); // class
cmpr[1] = ByteUtils.subbytes(smaliCode, dashPos + 2, methodEnd); // method
cmpr[2] = ByteUtils.subbytes(smaliCode, methodEnd + 1, parametersEnd); // parameters
cmpr[3] = ByteUtils.subbytes(smaliCode, parametersEnd + 1); // return value
return cmpr;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getLabel()
*/
@Override
public byte[] getLabel() {
return label;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#hasConstant()
*/
@Override
public final boolean hasConstant() {
return hasConstant;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.application.instructions.InstructionInterface#getConstantValue()
*/
@Override
public String getConstantValue() throws SyntaxException {
if (!hasConstant) return null;
// this is only a temp constant
ConstantInterface c = new Constant(codeLine, -1, new LinkedList<BasicBlockInterface>(), -1);
return c.getValue();
}
}