package com.dedupeer.processing; import java.util.ArrayList; import java.util.Arrays; import org.apache.log4j.Logger; import com.dedupeer.checksum.Checksum32; import com.dedupeer.checksum.RollingChecksumOlder; /** * @author Paulo Fernando (pf@paulofernando.net.br) * @deprecated */ public class EagleEye { private static final Logger log = Logger.getLogger(EagleEye.class); /** * Try find a data block in {@code file} with same bytes as the {@code chunk} * @param file File where the block will be searched * @param chunk Data block to find in {@code file} * @return index on the {@code file} where the pattern matches. -1 means the pattern not found */ public static int searchDuplication(byte[] file, byte[] chunk) { Checksum32 chunkC32 = new Checksum32(); chunkC32.check(chunk, 0, chunk.length); int chunkHash = chunkC32.getValue(); long time = System.currentTimeMillis(); int index = 0; Checksum32 c32 = new Checksum32(); c32.check(file, 0, chunk.length); int hash; hash = c32.getValue(); if(chunkHash == hash) { log.debug("Found it! [hash = " + hash + "] and [index = " + index + "]"); return index; } index++; while(index < file.length - chunk.length) { c32.roll(file[index]); hash = c32.getValue(); if(chunkHash == hash) { log.debug("Found it! [hash = " + hash + "] and [index = " + index + "]"); return index; } index++; } log.debug("Processed in " + (System.currentTimeMillis() - time) + " miliseconds"); return -1; } /** * Try find a data block in {@code file} with same bytes as the {@code chunk} * @param file File where the block will be searched * @param chunkHash Hash computed of a chunk * @param offset Position of the last byte of new chunk to search * @param sizeOfChunk Size of the chunk from which the {@code hash} was computed * @return index on the {@code file} where the pattern matches. -1 if not found it. */ public static int searchDuplication(byte[] file, int chunkHash, int offset, int sizeOfChunk) { int index = offset; Checksum32 c32 = new Checksum32(); c32.check(file, offset, sizeOfChunk); int hash; hash = c32.getValue(); if(chunkHash == hash) { log.debug("Found it! [hash = " + hash + "] and [index = " + index + "] *"); return index; } index++; while(index <= file.length - sizeOfChunk) { c32.roll(file[index]); hash = c32.getValue(); if(chunkHash == hash) { index -= (sizeOfChunk - 1); //the index informed to roll() is the index of the last byte log.debug("Found it! [hash = " + hash + "] and [index = " + index + "]"); return index; } index++; } log.debug("-> Pattern not found!"); return -1; } /** * Try find a data block in {@code file} with same bytes as the {@code chunk} * @param file File where the block will be searched * @param chunk Data block to find in {@code file} * @return Indexes on the {@code file} where the pattern matches */ @Deprecated public static ArrayList<Integer> searchDuplicationWithChecksumOlder(byte[] file, byte[] chunk) { ArrayList<Integer> indexes = new ArrayList<Integer>(); Long hash = RollingChecksumOlder.sum(chunk); RollingChecksumOlder checksum = new RollingChecksumOlder(file, chunk.length); int i = 0; while (checksum.next()) { long cs = checksum.weak(); if(cs == hash) { log.debug("\nFound! [index = " + i +"]"); indexes.add(i); log.debug(cs); } i++; } if(indexes.size() == 0) { log.debug("Duplicated block"); } return indexes; } /** * Try find a data block in {@code file} with same bytes as the {@code chunk} * @param file File where the block will be searched * @param hash Hash computed of a chunk * @param sizeOfChunk Size of the chunk from which the {@code hash} was computed * @return index on the {@code file} where the pattern matches. -1 if not found it. */ @Deprecated public static int searchDuplicationWithChecksumOlder(byte[] file, long hash, int sizeOfChunk) { RollingChecksumOlder checksum = new RollingChecksumOlder(file, sizeOfChunk); int i = 0; while (checksum.next()) { long cs = checksum.weak(); if(cs == hash) { return i; } i++; } return -1; } /** * Try find a data block in {@code file} with same bytes as the {@code chunk} without checksum * @param file File where the block will be searched * @param hash Hash computed of a chunk * @param start Initial byte to search * @param sizeOfChunk Size of the chunk from which the {@code hash} was computed * @return index on the {@code file} where the pattern matches. -1 if not found it. */ @Deprecated public static int searchDuplicationWithoutRollingChecksum(byte[] file, int start, long hash, int sizeOfChunk) { file = Arrays.copyOfRange(file, start, file.length); RollingChecksumOlder checksum = new RollingChecksumOlder(file, sizeOfChunk); int i = 0; while (checksum.next()) { if(i + sizeOfChunk < file.length) { long cs = RollingChecksumOlder.sum(Arrays.copyOfRange(file, i, i + sizeOfChunk)); if(cs == hash) { return i; } //FIXME Byte did not jump one by one. i += sizeOfChunk; } } return -1; } }