/* ** ByteSeqSpecifier.java * * Copyright The National Archives 2005-2006. All rights reserved. * See Licence.txt for full licence details. * * Developed by: * Tessella Support Services plc * 3 Vineyard Chambers * Abingdon, OX14 3PX * United Kingdom * http://www.tessella.com * * Tessella/NPD/4826 * PRONOM 4 * * $Id: ByteSeqSpecifier.java,v 1.7 2006/03/13 15:15:28 linb Exp $ * * $Log: ByteSeqSpecifier.java,v $ * Revision 1.7 2006/03/13 15:15:28 linb * Changed copyright holder from Crown Copyright to The National Archives. * Added reference to licence.txt * Changed dates to 2005-2006 * * Revision 1.6 2006/02/13 09:26:16 gaur * Fixed bug in searching files from EOF, after first STS round * * Revision 1.5 2006/02/09 15:04:37 gaur * Corrected formatting * * Revision 1.4 2006/02/07 17:16:22 linb * - Change fileReader to ByteReader in formal parameters of methods * - use new static constructors * - Add detection of if a filePath is a URL or not * * Revision 1.3 2006/02/07 11:30:04 gaur * Added support for endianness of signature * * Revision 1.2 2006/02/03 16:54:41 gaur * We now allow general wildcards of arbitrary endianness: e.g., [!~A1B1:C1D1] * * Revision 1.1 2006/02/02 17:17:04 gaur * Initial version. Functionality not yet complete, but should be sufficient to emulate the old behaviour. * */ package uk.gov.nationalarchives.droid.signatureFile; import uk.gov.nationalarchives.droid.binFileReader.ByteReader; /** * Defines the permissible values to be taken by a specific sequence of bytes. For example, it might specify that * two bytes in succession must be between 8080 and 808F (inclusive) * * @author Richard Gault, Tessella */ public class ByteSeqSpecifier { // Private members private byte[] minSeq; // The minimum (inclusive) value which the sequence can take: 80, 80 in the example in the header (except that we take off 128 before storing a value in the array, since bytes are unsigned) private byte[] maxSeq; // The maximum (inclusive) value which the sequence can take: 80, 8F in the example in the header private boolean negate; // If true, negates the sense of the test (in the example in the header, it would specify that the two bytes must be outside the range 8080-808f) /* Getter */ public int getNumBytes() { return minSeq.length; } // Will always be the same as maxSeq.length /** * Creates a new instance of ByteSeqSpecifier * * @param asciiRep A StringBuffer whose initial portion will be an ASCII representation of the bytes specifier. This will be * altered so that this initial portion is removed. */ public ByteSeqSpecifier(StringBuffer asciiRep) throws Exception { String specifier; // The string of characters defining the bytes specifier (excluding any square brackets) // First off, handle the case of a simple specifier: A2, for example. if (asciiRep.charAt(0) != '[') { specifier = asciiRep.substring(0, 2); asciiRep.delete(0, 2); } else { // We have a non-trivial byte sequence Specifier. Extract it from the front of asciiRep specifier = asciiRep.substring(1, asciiRep.indexOf("]")); asciiRep.delete(0, specifier.length() + 2); } negate = false; // Does the specifier begin with a ! (indicating negation)? Remove it if so. while (specifier.charAt(0) == '!' || specifier.charAt(0) == '~') { if (specifier.charAt(0) == '!') { negate = !negate; } specifier = specifier.substring(1); } // Does the specifier contain a : (indicating a range)? If so, set minRage and maxRange to be the strings on either side. // If not, set them both to be the same: the whole of specifier. String minRange; String maxRange; int colonPos = specifier.indexOf(':'); if (colonPos >= 0) { minRange = specifier.substring(0, colonPos); maxRange = specifier.substring(colonPos + 1); } else { minRange = specifier; maxRange = specifier; } // Sanity check that minRange and maxRange are the same length if (minRange.length() != maxRange.length()) { throw new Exception("Invalid internal signature supplied"); } // We may now assume that both minRange and maxRange contain pairs of characters representing concrete bytes. Extract and // store them in our two arrays int seqLength = minRange.length() / 2; minSeq = new byte[seqLength]; maxSeq = new byte[seqLength]; for (int i = 0; i < seqLength; i++) { int byteVal = Integer.parseInt(minRange.substring(2 * i, 2 * (i + 1)), 16); minSeq[i] = (byte) (byteVal + Byte.MIN_VALUE); byteVal = Integer.parseInt(maxRange.substring(2 * i, 2 * (i + 1)), 16); maxSeq[i] = (byte) (byteVal + Byte.MIN_VALUE); } } /** * Determines whether or not a given portion of a binary file matches the sequence of bytes we specify. * * @param file The file we're currently testing * @param startPos The position of the first byte in the file to examine * @param direction +1 (left to right) or -1 (right to left). The overall direction which our caller is searching in * @param bigEndian True iff the signature we are matching is big-endian * @return true iff the portion matches * <p/> * Note: In an ideal world, we would hold bigEndian as a private member, set up on construction. However, the framework * used during parsing of the XML file does not lend itself to easily fetching information from a grandparent * element. Consequently, we parse the byte sequence specifier in ignorance of its endianness, and wait until * we try to match against a specific byte sequence (here) to find out how minSeq and maxSeq should be interpreted. */ public boolean matchesByteSequence(ByteReader file, long startPos, int direction, boolean bigEndian) { try { // We have to perform the comparison from big-end to little-end. Consequently, if we're reading // from right to left but using big-endian-ness, or if we're reading from left-to-right but using // little-endian-ness, we have to search through our sequence backwards -- that is, left-to-right // in the former case, or right-to-left in the latter. if (!bigEndian && direction == 1) { direction = -1; startPos += this.getNumBytes() - 1; } else if (bigEndian && direction == -1) { direction = 1; startPos = startPos - this.getNumBytes() + 1; } int arrayPos = (direction == 1) ? 0 : this.getNumBytes() - 1; // Loop through the sequence, checking to ensure that the contents of the binary file >= the minimum sequence for (int fileOffset = 0; 0 <= arrayPos && arrayPos < this.getNumBytes(); fileOffset += direction, arrayPos += direction) { // Read the corresponding byte from the file. Because this is stored in 2s complement form, we need to // convert it to the same form that minSeq is stored in int fileByte = file.getByte(startPos + fileOffset); if (fileByte < 0) { fileByte += 256; } fileByte += Byte.MIN_VALUE; if (fileByte < minSeq[arrayPos]) { // We're outside the allowed range. return negate; } else if (fileByte > minSeq[arrayPos]) { // The whole of the sequence is definitely greater than minSeq. Go on and see if it's less than maxSeq. break; } } // Repeat the previous loop, but this time checking to ensure that the contents of the binary file <= the maximum sequence arrayPos = (direction == 1) ? 0 : this.getNumBytes() - 1; for (int fileOffset = 0; arrayPos >= 0 && arrayPos < this.getNumBytes(); fileOffset += direction, arrayPos += direction) { int fileByte = file.getByte(startPos + fileOffset); if (fileByte < 0) { fileByte += 256; } fileByte += Byte.MIN_VALUE; if (fileByte > maxSeq[arrayPos]) { return negate; } else if (fileByte < maxSeq[arrayPos]) { break; } } return !negate; } catch (Exception e) { // This is most likely to occur if we run off the end of the file. (In practice, this method shouldn't be called // unless we have enough bytes to read, but this is belt and braces.) return false; } } }