/**
*
*/
package de.rub.syssec.saaf.analysis.steps.obfuscation;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import de.rub.syssec.saaf.analysis.steps.AbstractStep;
import de.rub.syssec.saaf.misc.config.Config;
import de.rub.syssec.saaf.model.analysis.AnalysisException;
import de.rub.syssec.saaf.model.analysis.AnalysisInterface;
import de.rub.syssec.saaf.model.application.ClassInterface;
import de.rub.syssec.saaf.model.application.FieldInterface;
import de.rub.syssec.saaf.model.application.MethodInterface;
/**
* Detects obfuscated classes by calculating the entropy.
*
* @author Tilman Bender <tilman.bender@rub.de>
*
*/
public class EntropyBasedDetectObfuscationStep extends AbstractStep {
private static final double ENTROPY_CONSTANT = 2.25;
private static List<String> ignored = Arrays.asList("<clinit>","<init>",
"onActivityResult",
"onBind",
"onChange",
"onClick",
"onCreate",
"onDestroy",
"onDisabled",
"onEnabled",
"onNewIntent",
"onOpen",
"onReceive",
"onStartCommand",
"onTerminate",
"onTransact",
"onUnbind",
"onUpgrade");
/**
* Calculates the entropy of a character string.
*
* @see <a href="http://en.wikipedia.org/wiki/Entropy_%28information_theory%29#Definition"> Definition</a>
* @param name
* @return
*/
public static double entropy(String name) {
final Map<Character, Long> numberOfOccurences = new HashMap<Character, Long>();
//count character frequency
for (char c : name.toCharArray()) {
Long occurrance = numberOfOccurences.get(c);
numberOfOccurences.put(c, occurrance == null ? 1L : ++occurrance);
}
double combinedEntropy = 0.0d;
double probability;
for (Character c : numberOfOccurences.keySet()) {
//calculate probability of the symbol
probability = numberOfOccurences.get(c) / (double) name.length();
combinedEntropy += probability * (Math.log(probability) / Math.log(2));
}
return -combinedEntropy;
}
public static double median(List<Double> values) {
Collections.sort(values);
int middle = values.size()/2;
if (values.size()%2 == 1) {
return values.get(middle);
} else {
return (values.get(middle-1) + values.get(middle)) / 2.0;
}
}
public EntropyBasedDetectObfuscationStep(Config cfg,boolean enabled)
{
this.logger = Logger.getLogger(getClass());
this.config = cfg;
this.name = "Obfuscation Check";
this.description = "Calculates String entropy of class and method names to detect obfuscation";
this.enabled = enabled;
}
/* (non-Javadoc)
* @see de.rub.syssec.saaf.analysis.steps.AbstractStep#doProcessing(de.rub.syssec.saaf.model.analysis.AnalysisInterface)
*/
@Override
protected boolean doProcessing(AnalysisInterface analysis)
throws AnalysisException {
logger.info("Calculating shannon entropy for all class names");
for(ClassInterface smaliClass : analysis.getApp().getAllSmaliClasss(true))
{
entropy(smaliClass);
logger.info("Entropy for class "+smaliClass.getClassName()+": "+smaliClass.getEntropy().CMFEntropy);
if(smaliClass.getEntropy().CMFEntropy<ENTROPY_CONSTANT)
{
smaliClass.setObfuscated(true);
logger.info("Class "+smaliClass.getClassName()+ " is potentially obfuscated");
}
}
return true;
}
/**
* Calculates the entropy of a class.
*
* This method actually calculates several versions of the entropy.
*
* <ul>
* <li>entropy1: calculated from concatenating the classname and all method names </li>
* <li>entropy2: calculated from concatenating class-,method- and fieldnames </li>
* <li>entropy3: calculated as the average over the entropies of class-, method- and fieldnames</li>
* </ul>
*
* @param smaliClass
* @return
*/
public void entropy(ClassInterface smaliClass)
{
logger.info("Checking class "+smaliClass.getClassName()+" fo obfuscation");
List<Double> entropies = new ArrayList<Double>();
//this is used to produce one large string from names of the class,methods and fields
StringBuilder allNames = new StringBuilder(smaliClass.getClassName());
entropies.add(entropy(smaliClass.getClassName()));
double entropy=0.0;
for(MethodInterface method : smaliClass.getMethods())
{
//ignore methods that are never obfuscated and only distort the stats
if(isIgnored(method))
{
continue;
}
allNames.append(method.getName());
entropy = entropy(method.getName());
method.setEntropy(new Entropy(entropy));
if(entropy<ENTROPY_CONSTANT)
{
smaliClass.setObfuscated(true);
method.setObfuscated(true);
logger.info("Method "+method.getReadableJavaName()+ " is potentially obfuscated");
}else{
method.setObfuscated(false);
smaliClass.setObfuscated(false);
}
entropies.add(entropy);
}
Entropy e = new Entropy();
//calculate entropy of concatenation of class-name and all method names
e.CMEntropy= entropy(allNames.toString());
for(FieldInterface field : smaliClass.getAllFields())
{
allNames.append(field.getFieldName());
entropies.add(entropy(field.getFieldName()));
}
//calculate entropy of concatenation of class-, method- and fieldnames
e.CMFEntropy = entropy(allNames.toString());
//calculate the average of all separate entropies
e.AverageEntropy = mean(entropies);
smaliClass.setEntropy(e);
}
private boolean isIgnored(MethodInterface method) {
if(ignored.contains(method.getName()))
{
logger.info("Ignoring method "+method.getName());
return true;
}
return false;
}
public double mean(List<Double> entropies) {
double sum = 0.0;
for( Double entropy : entropies)
{
sum += entropy.doubleValue();
}
if(entropies.size()!=0)
{
return sum/entropies.size();
}else
{
return entropies.size();
}
}
}