// Copyright 2016 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.archivepatcher.generator.bsdiff; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.RandomAccessFile; import java.nio.charset.StandardCharsets; // TODO(andrewhayden) clean up the various generatePatch(...) methods, there are too many. /** * A helper class that handles the main BsDiff I/O and patch generation, by calling into the main * algorithm implementation in {@link BsDiff} */ public class BsDiffPatchWriter { static final int DEFAULT_MINIMUM_MATCH_LENGTH = 16; /** * Write a patch entry. * * @param newData * @param oldData * @param newPosition the first byte in |newData| to write either raw or as a diff against * |oldData|. * @param oldPosition the first byte in |oldData| to diff against |newData|. Ignored if * |diffLength| is empty. * @param diffLength the number of bytes to diff between |newData| and |oldData| starting at * |newPosition| and |oldPosition| respectively. * @param extraLength the number of bytes from |newData| to write starting at |newPosition + * diffLength|. * @param oldPositionOffsetForNextEntry the offset between |oldPosition| for the next entry and * |oldPosition| + |diffLength| for this entry. * @param outputStream the output stream to write the patch entry to. * @throws IOException if unable to read or write data */ private static void writeEntry( RandomAccessObject newData, RandomAccessObject oldData, int newPosition, int oldPosition, int diffLength, int extraLength, int oldPositionOffsetForNextEntry, OutputStream outputStream) throws IOException { // Write control data BsUtil.writeFormattedLong(diffLength, outputStream); BsUtil.writeFormattedLong(extraLength, outputStream); BsUtil.writeFormattedLong(oldPositionOffsetForNextEntry, outputStream); newData.seek(newPosition); oldData.seek(oldPosition); // Write diff data for (int i = 0; i < diffLength; ++i) { // TODO(hartmanng): test using a small buffer to insulate read() calls (and write() for that // matter). outputStream.write(newData.readUnsignedByte() - oldData.readUnsignedByte()); } if (extraLength > 0) { // This seek will throw an IOException sometimes, if we try to seek to the byte after // the end of the RandomAccessObject. newData.seek(newPosition + diffLength); // Write extra data for (int i = 0; i < extraLength; ++i) { // TODO(hartmanng): same as above - test buffering readByte(). outputStream.write(newData.readByte()); } } } /** * Generate a BsDiff patch given a Matcher. * * @param oldData the old blob * @param newData the new blob * @param matcher a Matcher to find binary matches between oldData and newData * @param outputStream the outputStream for the new generated patch * @throws IOException if unable to read or write data * @throws InterruptedException if any thread interrupts this thread */ // Visible for testing only static void generatePatchWithMatcher( RandomAccessObject oldData, RandomAccessObject newData, Matcher matcher, OutputStream outputStream) throws IOException, InterruptedException { // Compute the differences, writing ctrl as we go int lastNewPosition = 0; int lastOldPosition = 0; int newPosition = 0; int oldPosition = 0; while (newPosition < newData.length()) { if (Thread.interrupted()) { throw new InterruptedException(); } Matcher.NextMatch nextMatch = matcher.next(); if (nextMatch.didFindMatch) { newPosition = nextMatch.newPosition; oldPosition = nextMatch.oldPosition; } else { newPosition = (int) newData.length(); } // Extend the current match (|newPosition|, |oldPosition|) backward such that 50% of the bytes // match. We have written diff / extra data up till |lastNewPosition| so we cannot extend // further back than |lastNewPosition|. int backwardExtension = 0; if (newPosition < newData.length()) { int score = 0; int bestScore = 0; for (int i = 1; newPosition - i >= lastNewPosition && oldPosition >= i; ++i) { oldData.seek(oldPosition - i); newData.seek(newPosition - i); if (oldData.readByte() == newData.readByte()) { ++score; } else { --score; } if (score > bestScore) { bestScore = score; backwardExtension = i; } } } // Extend the previous match (|lastNewPosition|, |lastOldPosition|) forward such that 50% of // the bytes match. (|lastNewPosition|, |lastOldPosition|) were extended backward in the // previous iteration of the loop. int forwardExtension = 0; { int score = 0; int bestScore = 0; oldData.seek(lastOldPosition); newData.seek(lastNewPosition); for (int i = 0; lastNewPosition + i < newPosition && lastOldPosition + i < oldData.length(); ++i) { if (oldData.readByte() == newData.readByte()) { ++score; } else { --score; } if (score > bestScore) { bestScore = score; forwardExtension = i + 1; } } } // Adjust |backwardExtension| and |forwardExtension| such that the extended matches do // not intersect in |newData|. They can intersect in |oldData|. int overlap = (lastNewPosition + forwardExtension) - (newPosition - backwardExtension); if (overlap > 0) { int score = 0; int bestScore = 0; int backwardExtensionDecrement = 0; for (int i = 0; i < overlap; ++i) { newData.seek(lastNewPosition + forwardExtension - overlap + i); oldData.seek(lastOldPosition + forwardExtension - overlap + i); if (newData.readByte() == oldData.readByte()) { ++score; } newData.seek(newPosition - backwardExtension + i); oldData.seek(oldPosition - backwardExtension + i); if (newData.readByte() == oldData.readByte()) { --score; } if (score > bestScore) { bestScore = score; backwardExtensionDecrement = i + 1; } } forwardExtension -= overlap - backwardExtensionDecrement; backwardExtension -= backwardExtensionDecrement; } // Write an entry with: // - The diff between |newData| and |oldData| for the previous extended match: // oldData[lastOldPosition ... lastOldPosition + forwardExtension - 1] and // newData[lastNewPosition ... lastNewPosition + forwardExtension - 1]. // - The bytes in |newData| between |lastNewPosition| and |newPosition| which are part of // neither the previous extended match or the new extended match: // newData[lastNewPosition + forwardExtension ... newPosition - backwardExtension - 1] int oldPositionOffset = 0; if (newPosition < newData.length()) { // The offset from the byte after the last byte of the previous match in |newData| to the // first byte of the new match in |oldData|. oldPositionOffset = (oldPosition - backwardExtension) - (lastOldPosition + forwardExtension); } // The number of bytes in |newData| between |lastNewPosition| and |newPosition| which are part // of neither the previous extended match or the new extended match. int newNoMatchLength = (newPosition - backwardExtension) - (lastNewPosition + forwardExtension); writeEntry( newData, oldData, lastNewPosition, lastOldPosition, forwardExtension, newNoMatchLength, oldPositionOffset, outputStream); lastNewPosition = newPosition - backwardExtension; lastOldPosition = oldPosition - backwardExtension; } } /** * Generate a diff between the old data and the new, writing to the specified stream. Uses {@link * #DEFAULT_MINIMUM_MATCH_LENGTH} as the match length. * * @param oldData the old data * @param newData the new data * @param outputStream where output should be written * @param randomAccessObjectFactory factory to create auxiliary storage during BsDiff * @throws IOException if unable to read or write data * @throws InterruptedException if any thread interrupts this thread */ public static void generatePatch( final RandomAccessObject oldData, final RandomAccessObject newData, final OutputStream outputStream, final RandomAccessObjectFactory randomAccessObjectFactory) throws IOException, InterruptedException { generatePatch( oldData, newData, outputStream, randomAccessObjectFactory, DEFAULT_MINIMUM_MATCH_LENGTH); } /** * Generate a diff between the old data and the new, writing to the specified stream. Uses * in-memory byte array storage for ancillary allocations and {@link * #DEFAULT_MINIMUM_MATCH_LENGTH} as the match length. * * @param oldData the old data * @param newData the new data * @param outputStream where output should be written * @throws IOException if unable to read or write data * @throws InterruptedException if any thread interrupts this thread */ public static void generatePatch( final byte[] oldData, final byte[] newData, final OutputStream outputStream) throws IOException, InterruptedException { generatePatch(oldData, newData, outputStream, DEFAULT_MINIMUM_MATCH_LENGTH); } /** * Generate a diff between the old data and the new, writing to the specified stream. Uses * in-memory byte array storage for ancillary allocations. * * @param oldData the old data * @param newData the new data * @param outputStream where output should be written * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the * oldData and newData. This can have a significant effect on both the generated patch size * and the amount of time and memory required to apply the patch. * @throws IOException if unable to read or write data * @throws InterruptedException if any thread interrupts this thread */ public static void generatePatch( final byte[] oldData, final byte[] newData, final OutputStream outputStream, final int minimumMatchLength) throws IOException, InterruptedException { try (RandomAccessObject oldDataRAO = new RandomAccessObject.RandomAccessByteArrayObject(oldData); RandomAccessObject newDataRAO = new RandomAccessObject.RandomAccessByteArrayObject(newData); ) { generatePatch( oldDataRAO, newDataRAO, outputStream, new RandomAccessObjectFactory.RandomAccessByteArrayObjectFactory(), minimumMatchLength); } } /** * Generate a diff between the old data and the new, writing to the specified stream. Uses * file-based storage for ancillary operations and {@link #DEFAULT_MINIMUM_MATCH_LENGTH} as the * match length. * * @param oldData a file containing the old data * @param newData a file containing the new data * @param outputStream where output should be written * @throws IOException if unable to read or write data * @throws InterruptedException if any thread interrupts this thread */ public static void generatePatch( final File oldData, final File newData, final OutputStream outputStream) throws IOException, InterruptedException { generatePatch(oldData, newData, outputStream, DEFAULT_MINIMUM_MATCH_LENGTH); } /** * Generate a diff between the old data and the new, writing to the specified stream. Uses * file-based storage for ancillary allocations. * * @param oldData a file containing the old data * @param newData a file containing the new data * @param outputStream where output should be written * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the * oldData and newData. This can have a significant effect on both the generated patch size * and the amount of time and memory required to apply the patch. * @throws IOException if unable to read or write data * @throws InterruptedException if any thread interrupts this thread */ public static void generatePatch( final File oldData, final File newData, final OutputStream outputStream, final int minimumMatchLength) throws IOException, InterruptedException { try (RandomAccessFile oldDataRAF = new RandomAccessFile(oldData, "r"); RandomAccessFile newDataRAF = new RandomAccessFile(newData, "r"); RandomAccessObject oldDataRAO = new RandomAccessObject.RandomAccessMmapObject(oldDataRAF, "r"); RandomAccessObject newDataRAO = new RandomAccessObject.RandomAccessMmapObject(newDataRAF, "r"); ) { generatePatch( oldDataRAO, newDataRAO, outputStream, new RandomAccessObjectFactory.RandomAccessMmapObjectFactory("rw"), minimumMatchLength); } // Due to a bug in the JVM (http://bugs.java.com/view_bug.do?bug_id=6417205), we need to call // gc() and runFinalization() explicitly to get rid of any MappedByteBuffers we may have used // during patch generation. System.gc(); System.runFinalization(); } /** * Generate a diff between the old data and the new, writing to the specified stream. * * @param oldData the old data * @param newData the new data * @param outputStream where output should be written * @param randomAccessObjectFactory factory to create auxiliary storage during BsDiff * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the * oldData and newData. This can have a significant effect on both the generated patch size * and the amount of time and memory required to apply the patch. * @throws IOException if unable to read or write data * @throws InterruptedException if any thread interrupts this thread */ public static void generatePatch( final RandomAccessObject oldData, final RandomAccessObject newData, final OutputStream outputStream, final RandomAccessObjectFactory randomAccessObjectFactory, final int minimumMatchLength) throws IOException, InterruptedException { // Write header (signature + new file length) outputStream.write("ENDSLEY/BSDIFF43".getBytes(StandardCharsets.US_ASCII)); BsUtil.writeFormattedLong(newData.length(), outputStream); // Do the suffix search. try (final RandomAccessObject groupArray = new DivSuffixSorter(randomAccessObjectFactory).suffixSort(oldData)) { BsDiffMatcher matcher = new BsDiffMatcher(oldData, newData, groupArray, minimumMatchLength); generatePatchWithMatcher(oldData, newData, matcher, outputStream); } } }