/* SAAF: A static analyzer for APK files.
* Copyright (C) 2013 syssec.rub.de
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.rub.syssec.saaf.misc;
/**
* The Knuth-Morris-Pratt Pattern Matching Algorithm for byte arrays.
* Adapted from http://helpdesk.objects.com.au/java/search-a-byte-array-for-a-byte-sequence
*/
public class KMP {
/**
* Search the data byte array for the first occurrence of the byte array
* pattern.
*/
public static int indexOf(byte[] data, byte[] pattern) {
return indexOf(data, 0, pattern);
}
/**
* Search the data byte array for the first occurrence of the byte array
* pattern beginning at offset. Use '*' as a wildcard for any amount of
* arbitrary bytes. A trailing wildcard will be ignored.
*/
public static int indexOf(byte[] data, int offset, byte[] pattern,
boolean ignoreCase, boolean wildcard) {
if (offset > data.length)
return -1;
int[] failure = computeFailure(pattern);
int j = 0;
for (int i = 0 + offset; i < data.length; i++) {
if (wildcard && pattern[j] == '*') {
/* Skip the wildcard */
j++;
/*
* If the wildcard was at the end of the pattern, data and
* pattern match and * can be ignored
*/
if (j == pattern.length)
return i - pattern.length;
/*
* Go through the data and skip everything which is not
* pattern[j]
*/
while (i < data.length
&& (!ignoreCase && pattern[j] != data[i] || ignoreCase
&& pattern[j] != data[i]
&& pattern[j] >= 'A'
&& pattern[j] <= 'Z'
&& pattern[j] + 32 != data[i]))
i++;
/*
* The pattern[j] character wasn't found anywhere in the data so
* there is no match
*/
if (i == data.length)
return -1;
}
while (j > 0
&& (!ignoreCase && pattern[j] != data[i] || ignoreCase
&& pattern[j] != data[i] && pattern[j] >= 'A'
&& pattern[j] <= 'Z' && pattern[j] + 32 != data[i])) {
j = failure[j - 1];
}
if (pattern[j] == data[i]
|| (ignoreCase && pattern[j] >= 'A' && pattern[j] <= 'Z' && pattern[j] + 32 == data[i])) {
j++;
}
if (j == pattern.length) {
return i - pattern.length + 1;
}
}
return -1;
}
public static int indexOf(byte[] data, int offset, byte[] pattern) {
return indexOf(data, offset, pattern, false, false);
}
/**
* Computes the failure function using a boot-strapping process, where the
* pattern is matched against itself.
*/
private static int[] computeFailure(byte[] pattern) {
int[] failure = new int[pattern.length];
int j = 0;
for (int i = 1; i < pattern.length; i++) {
while (j > 0 && pattern[j] != pattern[i]) {
j = failure[j - 1];
}
if (pattern[j] == pattern[i]) {
j++;
}
failure[i] = j;
}
return failure;
}
}