package com.dedupeer.chunking; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Vector; import org.apache.log4j.Logger; import com.dedupeer.backup.StoredFileFeedback; import com.dedupeer.checksum.Checksum32; import com.dedupeer.gui.component.renderer.ProgressInfo; import com.dedupeer.thrift.Chunk; import com.dedupeer.thrift.DeduplicationServiceImpl; import com.dedupeer.thrift.HashingAlgorithm; import com.dedupeer.utils.FileUtils; /** * Utility class for operations with chunks * @author Paulo Fernando (pf@paulofernando.net.br) */ public class Chunking { private static final Logger log = Logger.getLogger(Chunking.class); /** * Slice a file into pieces * @param file File to be sliced * @param destination Destination folder of the chunks * @param chunkSize Amount of bytes for chunk * @param chunkOffset initial chunk position to slice and dicing * @param chunksToSlice Amount of chunk to return * @param fileID File ID that is been stored * @param feedback sends a feedback to the user about the progress * @return chunks information and path to each chunk in hard disk */ public static ArrayList<Chunk> slicingAndDicing(File file, String destination, int chunkSize, long chunkOffset, int chunksToSlice, String fileID, HashingAlgorithm hashingAlgorithm, StoredFileFeedback feedback) throws IOException { ArrayList<Chunk> chunks = new ArrayList<Chunk>(); new File(destination).mkdir(); long filesize = file.length(); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); fis.skip(chunkOffset * chunkSize); log.debug("Starting the slicing and dicing..."); long time = System.currentTimeMillis(); String prefix = FileUtils.getOnlyName(file.getName()); byte[] b = new byte[chunkSize]; int ch = 0; long chunkCount = chunkOffset; Checksum32 c32 = new Checksum32(); filesize -= (chunkOffset * chunkSize); if(filesize < 0) { fis.close(); throw new IOException(); } long globalIndex = chunkOffset * chunkSize; while((filesize > 0) && (chunks.size() != chunksToSlice)) { ch = fis.read(b,0,chunkSize); filesize = filesize-ch; String fname = destination + prefix + "_chunk" + "." + chunkCount; FileOutputStream fos= new FileOutputStream(new File(fname)); fos.write(b,0,ch); fos.flush(); fos.close(); c32.check(b, 0, ch); if(ch < chunkSize) { //If a chunk size is smaller than default b = Arrays.copyOf(b, ch); } Chunk chunk = new Chunk(fileID, String.valueOf(chunkCount), String.valueOf(globalIndex), String.valueOf(ch)); chunk.setWeakHash(String.valueOf(c32.getValue())); chunk.setStrongHash(DeduplicationServiceImpl.getStrongHash(hashingAlgorithm, b)); chunk.setContent(FileUtils.getBytesFromFile(fname)); chunks.add(chunk); chunkCount++; globalIndex += b.length; } fis.close(); log.debug(chunkCount + " created of " + (chunkSize/1000) + "KB in " + (System.currentTimeMillis() - time) + " miliseconds"); return chunks; } /** * Retrieves the chunks of the file system and stores them in a vector of bytes * @param path Path of the folder with the chunks * @param initalNameOfCHunk The initial name of the chunks * @param to Path to save the file restored */ public static void restoreFile(String path, String initalNameOfCHunk, String to) { Vector<byte[]> chunks = new Vector<byte[]>(); int i = 0; while((new File(path + initalNameOfCHunk + "." + i)).exists()) { chunks.addElement(FileUtils.getBytesFromFile(path + initalNameOfCHunk + "." + i)); i++; } log.debug(chunks.size() + " chunks restored"); new File(to.substring(0, to.lastIndexOf("\\"))).mkdir(); write(chunks, to); } /** * Computes the hashes of all chunks in a directory * @param path Directory where the chunks are * @param initalNameOfCHunk Initial name of the chunks * @return Collection of hashes */ public static ArrayList<Integer> computeHashes(String path, String initalNameOfCHunk) { ArrayList<Integer> hashes = new ArrayList<Integer>(); log.debug("Computing hashes..."); long time = System.currentTimeMillis(); Checksum32 c32 = new Checksum32(); int i = 0; while((new File(path + initalNameOfCHunk + "." + i)).exists()) { byte[] chunk = FileUtils.getBytesFromFile(path + initalNameOfCHunk + "." + i); c32.check(chunk, 0, chunk.length); hashes.add(c32.getValue()); i++; } log.debug("Computed hashes of " + i + " chunks in " + (System.currentTimeMillis() - time) + " miliseconds"); return hashes; } /** * Write a Vector<byte[]> in a file * @param aInput Chunks to write in a file * @param newFile Name of the file to create */ public static void write(Vector<byte[]> aInput, String newFile) { try { OutputStream output = null; try { output = new BufferedOutputStream(new FileOutputStream(newFile)); for(byte[] chunk: aInput) { output.write(chunk); } } finally { output.close(); } } catch(FileNotFoundException ex){ log.error("File not found."); } catch(IOException ex){ log.error(ex); } } }