RSync.java example

Explorer
TomP2P-master
/*
 * Copyright 2013 Maxat Pernebayev, Thomas Bocek
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package net.tomp2p.synchronization;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import net.tomp2p.utils.Utils;

/**
 * Synchronization class is responsible for efficient and optimal
 * synchronization of data resources between responsible peer and replica peers.
 * If one of replicas goes offline, the responsible peer transfers the value
 * completely to the new replica peer. In case the values at responsible peer
 * and replica peer are the same, then no data is transmitted. If the values are
 * different, then only differences are sent to the replica peer.
 * 
 * @author Maxat Pernebayev
 * @author Thomas Bocek
 * 
 */
final public class RSync {

	/**
	 * It returns an array of weak and strong checksums for the value.
	 * 
	 * @param value
	 *            The value
	 * @param size
	 *            The offset size
	 * @return The array of checksums
	 * @throws NoSuchAlgorithmException
	 */
	public static List<Checksum> checksums(final byte[] value, final int blockSize) {
		final int numberOfBlocks = (value.length + blockSize - 1) / blockSize;
		final ArrayList<Checksum> checksums = new ArrayList<Checksum>(numberOfBlocks);
		final RollingChecksum adler = new RollingChecksum();

		for (int i = 0; i < numberOfBlocks; i++) {
			int remaining = Math.min(blockSize, value.length - (i * blockSize));
			adler.reset().update(value, i * blockSize, remaining);

			final int weakChecksum = adler.value();
			final byte[] strongChecksum = Utils.makeMD5Hash(value, i * blockSize, remaining);
			checksums.add(new Checksum(weakChecksum, strongChecksum));
		}
		return checksums;
	}

	/**
	 * It checks whether a match is found or not. If it is found returns
	 * reference otherwise -1.
	 * 
	 * @param wcs
	 *            The weak checksum of offset
	 * @param offset
	 *            The offset
	 * @param checksums
	 *            The checksums
	 * @return either the reference or -1
	 */
	private static int matches(int wcs, byte[] buffer, int offset, int length, List<Checksum> checksums) {
		int checksumSize = checksums.size();
		//TODO: hashing might be a better idea, for now it works
		for (int i = 0; i < checksumSize; i++) {
			int weakChecksum = checksums.get(i).weakChecksum();
			if (weakChecksum == wcs) {
				byte[] md5 = Utils.makeMD5Hash(buffer, offset, length);
				byte[] strongChecksum = checksums.get(i).strongChecksum();
				if (Arrays.equals(strongChecksum, md5)) {
					return i;
				}
			}
		}
		// no match found, content is different
		return -1;
	}

	/**
	 * It returns the sequence of instructions each of which contains either
	 * reference to a block or literal data.
	 * 
	 * @param array
	 *            The value at responsible peer
	 * @param checksums
	 *            The array of checksums
	 * @param blockSize
	 *            The block size
	 * @return The sequence of instructions
	 */
	public static List<Instruction> instructions(byte[] array, List<Checksum> checksums, int blockSize) {

		final List<Instruction> result = new ArrayList<Instruction>(checksums.size());
		final RollingChecksum adler = new RollingChecksum();
		final int length = array.length;

		int offset = 0;
		int lastRefFound = 0;
		int remaining = Math.min(blockSize, length - offset);

		adler.update(array, offset, remaining);

		for (;;) {
			final int wcs = adler.value();
			final int reference = matches(wcs, array, offset, remaining, checksums);
			if (reference != -1) {
				if (offset > lastRefFound) {
					result.add(new Instruction(new RArray(array, lastRefFound, offset - lastRefFound)));
				}
				result.add(new Instruction(reference));

				offset += remaining;
				lastRefFound = offset;
				remaining = Math.min(blockSize, length - offset);
				if (remaining == 0) {
					break;
				}
				adler.reset().update(array, offset, remaining);
			} else {
				offset++;
				if (blockSize > length - offset) {
					break;
				}
				adler.updateRolling(array);
			}
		}

		if (length > lastRefFound) {
			result.add(new Instruction(new RArray(array, lastRefFound, length - lastRefFound)));
		}

		return result;
	}

	/**
	 * It reconstructs the copy of responsible peer's value using instructions
	 * and the replica's value.
	 * 
	 * @param value
	 *            The value at replica
	 * @param instructions
	 *            The sequence of instructions
	 * @param blockSize
	 *            The offset size
	 * @return The value which is identical to the responsible peer's value
	 */
	public static ByteBuf reconstruct(byte[] value, List<Instruction> instructions, int blockSize) {
		ByteBuf result = Unpooled.buffer();
		for (Instruction instruction : instructions) {
			int ref = instruction.reference();
			if (ref != -1) {
				int offset = blockSize * ref;
				int remaining = Math.min(blockSize, value.length - offset);
				result.writeBytes(value, offset, remaining);
			} else if (instruction.literal().hasDataBuffer()) {
				result.writeBytes(instruction.literal().dataBuffer());
			} else {
				result.writeBytes(instruction.literal().array(), instruction.literal().offset(), instruction.literal().length());
			}
		}
		return result;
	}
	
	/**
	 * Variation of Adler as used in Rsync. Inspired by:
	 * 
	 * <pre>
	 * https://github.com/epeli/rollsum/blob/master/ref/adler32.py
	 * http://stackoverflow.com/questions/9699315/differences-in-calculation-of-adler32-rolling-checksum-python
	 * http://de.wikipedia.org/wiki/Adler-32
	 * http://developer.classpath.org/doc/java/util/zip/Adler32-source.html
	 * </pre>
	 * 
	 * @author Thomas Bocek
	 * 
	 */
	public static class RollingChecksum {

		private int a = 1;
		private int b = 0;
		private int length;
		private int offset;

		/**
		 * Resets the checksum to its initial state 1.
		 * 
		 * @return this class
		 */
		public RollingChecksum reset() {
			a = 1;
			b = 0;
			return this;
		}

		/**
		 * Iterates over the array and calculates a variation of Adler.
		 * 
		 * @param array
		 *            The array for the checksum calculation
		 * @param offset
		 *            The offset of the array
		 * @param length
		 *            The length of the data to iterate over (the length of the
		 *            sliding window). Once this is set,
		 *            {@link #updateRolling(byte[])} will use the same value
		 * @return this class
		 */
		public RollingChecksum update(final byte[] array, final int offset, final int length) {
			for (int i = 0; i < length; i++) {
				a = (a + (array[i + offset] & 0xff)) & 0xffff;
				b = (b + a) & 0xffff;
			}
			this.length = length;
			this.offset = offset;
			return this;
		}

		/**
		 * @return The calculated checksum
		 */
		public int value() {
			return (b << 16) | a;
		}

		/**
		 * Sets the checksum to this value.
		 * 
		 * @param checksum
		 *            The checksum to set
		 * @return this class
		 */
		public RollingChecksum value(final int checksum) {
			a = checksum & 0xffff;
			b = checksum >>> 16;
			return this;
		}

		/**
		 * Slide the window of the array by 1.
		 * 
		 * @param array
		 *            The array for the checksum calculation
		 * @param offset
		 *            The offset of the array
		 * @return this class
		 */
		public RollingChecksum updateRolling(final byte[] array) {
			final int removeIndex = offset;
			final int addIndex = offset + length;
			offset++;
			a = (a - (array[removeIndex] & 0xff) + (array[addIndex] & 0xff)) & 0xffff;
			b = (b - (length * (array[removeIndex] & 0xff)) + a - 1) & 0xffff;
			return this;
		}
	}
}