/* * Copyright The National Archives 2005-2006. All rights reserved. * See Licence.txt for full licence details. * * Developed by: * Tessella Support Services plc * 3 Vineyard Chambers * Abingdon, OX14 3PX * United Kingdom * http://www.tessella.com * * Tessella/NPD/4305 * PRONOM 4 * * $Id: SubSequence.java,v 1.8 2006/03/13 15:15:29 linb Exp $ * * $Log: SubSequence.java,v $ * Revision 1.8 2006/03/13 15:15:29 linb * Changed copyright holder from Crown Copyright to The National Archives. * Added reference to licence.txt * Changed dates to 2005-2006 * * Revision 1.7 2006/02/13 10:29:40 gaur * Fixed bug in searching a short file for a byte sequence at a large offset from BOF * * Revision 1.6 2006/02/13 09:26:16 gaur * Fixed bug in searching files from EOF, after first STS round * * Revision 1.5 2006/02/09 15:04:37 gaur * Corrected formatting * * Revision 1.4 2006/02/07 17:16:23 linb * - Change fileReader to ByteReader in formal parameters of methods * - use new static constructors * - Add detection of if a filePath is a URL or not * * Revision 1.3 2006/02/07 11:30:04 gaur * Added support for endianness of signature * * * $History: SubSequence.java $ * * ***************** Version 6 ***************** * User: Walm Date: 29/09/05 Time: 9:16 * Updated in $/PRONOM4/FFIT_SOURCE/signatureFile * Bug fix in response to JIRA issue PRON-29. * changed startPosInFile to an array + some changes to the way start * position options are dealt with. * * ***************** Version 5 ***************** * User: Walm Date: 17/05/05 Time: 12:47 * Updated in $/PRONOM4/FFIT_SOURCE/signatureFile * added more error trapping * * ***************** Version 4 ***************** * User: Walm Date: 5/04/05 Time: 18:08 * Updated in $/PRONOM4/FFIT_SOURCE/signatureFile * review headers * */ package uk.gov.nationalarchives.droid.signatureFile; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import uk.gov.nationalarchives.droid.MessageDisplay; import uk.gov.nationalarchives.droid.binFileReader.ByteReader; import uk.gov.nationalarchives.droid.xmlReader.SimpleElement; /** * holds a subsequence for a byte sequence * also contains most of the logic for identifying files * * @author Martin Waller * @version 4.0.0 */ public class SubSequence extends SimpleElement { private Log log = LogFactory.getLog(this.getClass()); private String reference; private int parentSignature; private boolean bigEndian = true; private ByteSequence parentByteSequence; int position; int minSeqOffset = 0; int maxSeqOffset = 0; int minFragLength; String sequence; // shiftFunction shift; long[] shiftFunction = new long[256]; List<LeftFragment> leftFragments = new ArrayList<LeftFragment>(); List<RightFragment> rightFragments = new ArrayList<RightFragment>(); byte[] byteSequence; List<List<SideFragment>> orderedLeftFragments = new ArrayList<List<SideFragment>>(); List<List<SideFragment>> orderedRightFragments = new ArrayList<List<SideFragment>>(); static boolean showProgress = false; public ByteSequence getByteSequence() { return parentByteSequence; } public void setByteSequence(ByteSequence byteSequence) { this.parentByteSequence = byteSequence; } /** * is this a EOF sub squence * If this subsequence does not match the * we can reject the entire signature * * @return boolean */ public boolean isEOF() { return (reference.equalsIgnoreCase("EOFoffset") && (position == 1)); } /** * is this a BOF sub squence * If this subsequence does not match the * we can reject the entire signature * * @return boolean */ public boolean isBOF() { return (reference.equalsIgnoreCase("BOFoffset") && (position == 1)); } public void setBigEndian(boolean bigEndian) { this.bigEndian = bigEndian; } public boolean isBigEndian() { return bigEndian; } public void setReference(String reference) { this.reference = reference; } /** * Get the id of the internal * signature that this sequence belongs to * * @return */ public int getParentSignature() { return parentSignature; } public void setParentSignature(int parentSignature) { this.parentSignature = parentSignature; } /* setters */ public void addLeftFragment(LeftFragment lf) { leftFragments.add(lf); } public void addRightFragment(RightFragment lf) { rightFragments.add(lf); } public void setPosition(int position) { this.position = position; } public void setShift(Shift theShift) { int theShiftByte = theShift.getShiftByte(); if (theShiftByte >= 0 && theShiftByte < 128) { this.shiftFunction[theShiftByte + 128] = theShift.getShiftValue(); } else if (theShiftByte >= 128 && theShiftByte < 256) { this.shiftFunction[theShiftByte - 128] = theShift.getShiftValue(); } } public void setDefaultShift(String theValue) { for (int i = 0; i < 256; i++) { this.shiftFunction[i] = Long.parseLong(theValue); } } public void setSequence(String seq) { this.sequence = seq; int seqLength = seq.length() / 2; if (2 * seqLength != seq.length()) { log.error("A problem - sequence of odd length was found: " + seq); // System.out.println("A problem - sequence of odd length was found: " + seq); } byteSequence = new byte[seqLength]; for (int i = 0; i < seqLength; i++) { int byteVal = Integer.parseInt(seq.substring(2 * i, 2 * (i + 1)), 16); byteSequence[i] = (byteVal > Byte.MAX_VALUE) ? (byte) (byteVal - 256) : (byte) byteVal; } } public void setMinSeqOffset(int theOffset) { this.minSeqOffset = theOffset; if (this.maxSeqOffset < this.minSeqOffset) { this.maxSeqOffset = this.minSeqOffset; } } public void setMaxSeqOffset(int theOffset) { this.maxSeqOffset = theOffset; if (this.maxSeqOffset < this.minSeqOffset) { this.maxSeqOffset = this.minSeqOffset; } } public void setMinFragLength(int theLength) { this.minFragLength = theLength; } public void setAttributeValue(String name, String value) { if (name.equals("Position")) { setPosition(Integer.parseInt(value)); } else if (name.equals("SubSeqMinOffset")) { setMinSeqOffset(Integer.parseInt(value)); } else if (name.equals("SubSeqMaxOffset")) { setMaxSeqOffset(Integer.parseInt(value)); } else if (name.equals("MinFragLength")) { setMinFragLength(Integer.parseInt(value)); } else { MessageDisplay.unknownAttributeWarning(name, this.getElementName()); } } /* getters */ public int getNumFragmentPositions(boolean leftFrag) { if (leftFrag) { return this.orderedLeftFragments.size(); } else { return this.orderedRightFragments.size(); } } public int getNumAlternativeFragments(boolean leftFrag, int thePosition) { if (leftFrag) { return this.orderedLeftFragments.get(thePosition - 1).size(); } else { return this.orderedRightFragments.get(thePosition - 1).size(); } } public SideFragment getFragment(boolean leftFrag, int thePosition, int theIndex) { if (leftFrag) { return (SideFragment) ((ArrayList) this.orderedLeftFragments.get(thePosition - 1)).get(theIndex); } else { return (SideFragment) ((ArrayList) this.orderedRightFragments.get(thePosition - 1)).get(theIndex); } } public long getShift(byte theByteValue) { // this.ShiftFunction is a long[256] array return this.shiftFunction[theByteValue + 128]; } public String getSequence() { return sequence; } public byte getByte(int theIndex) { return byteSequence[theIndex]; } public int getNumBytes() { return byteSequence.length; } public List<LeftFragment> getLeftFragments() { return leftFragments; } public List<RightFragment> getRightFragments() { return rightFragments; } public LeftFragment getRawLeftFragment(int theIndex) { return leftFragments.get(theIndex); } public RightFragment getRawRightFragment(int theIndex) { return rightFragments.get(theIndex); } public int getPosition() { return position; } public int getMinSeqOffset() { return minSeqOffset; } public int getMaxSeqOffset() { return maxSeqOffset; } public int getMinFragLength() { return minFragLength; } /** * Re-orders the left and right sequence fragments in increasing position order * this method must be after the signature file has been parsed and * before running any file identifications */ public void prepareSeqFragments() { /* Left fragments */ //Determine the number of fragment subsequences there are int numFrags = 0; for (int i = 0; i < leftFragments.size(); i++) { int currentPosition = this.getRawLeftFragment(i).getPosition(); if (currentPosition > numFrags) { numFrags = currentPosition; } } //initialise all necessary fragment lists (one for each position) for (int i = 0; i < numFrags; i++) { //loop through fragment positions List<SideFragment> alternativeFragments = new ArrayList<SideFragment>(); orderedLeftFragments.add(alternativeFragments); } //Add fragments to new structure for (int i = 0; i < leftFragments.size(); i++) { //loop through all fragments int currentPosition = this.getRawLeftFragment(i).getPosition(); orderedLeftFragments.get(currentPosition - 1).add(this.getRawLeftFragment(i)); } //clear out unecessary info this.leftFragments = null; /* Right fragments */ //Determine the number of fragment subsequences there are numFrags = 0; for (int i = 0; i < rightFragments.size(); i++) { int currentPosition = this.getRawRightFragment(i).getPosition(); if (currentPosition > numFrags) { numFrags = currentPosition; } } //initialise all necessary fragment lists (one for each position) for (int i = 0; i < numFrags; i++) { //loop through fragment positions List<SideFragment> alternativeFragments = new ArrayList<SideFragment>(); orderedRightFragments.add(alternativeFragments); } //Add fragments to new structure for (int i = 0; i < rightFragments.size(); i++) { //loop through all fragments int currentPosition = this.getRawRightFragment(i).getPosition(); orderedRightFragments.get(currentPosition - 1).add(this.getRawRightFragment(i)); } //clear out unecessary info this.rightFragments = null; } /** * Searches for this subsequence after the current file marker position in the file. * Moves the file marker to the end of this subsequence. * * @param targetFile the binary file to be identified * @param reverseOrder true if file is being searched from right to left * @param bigEndian True iff our parent signature is big-endian * @return boolean */ public boolean isFoundAfterFileMarker(ByteReader targetFile, boolean reverseOrder, boolean bigEndian) { boolean subSeqFound = false; try { long fileSize = targetFile.getNumBytes() - 1; int searchDirection = reverseOrder ? -1 : 1; //get the current file marker long startPosInFile = targetFile.getFileMarker(); //Add the minimum offset before start of sequence and update the file marker accordingly startPosInFile = startPosInFile + (long) (searchDirection * this.getMinSeqOffset()); if (fileSize < startPosInFile - 1) { // We're looking for a sequence of bytes at an offset which is longer than the file itself return false; } targetFile.setFileMarker(startPosInFile); //start searching for main sequence after the minimum length of the relevant fragments startPosInFile = startPosInFile + (long) (searchDirection * this.getMinFragLength()); int numSeqBytes = this.getNumBytes(); boolean missMatchFound; int byteLoopStart = reverseOrder ? numSeqBytes - 1 : 0; int byteLoopEnd = reverseOrder ? 0 : numSeqBytes - 1; try { while (!subSeqFound) { //compare sequence with file contents directly at fileMarker position missMatchFound = false; // Start by checking the last byte in the window on the file. // If this byte is different from the last byte in the subsequence, // Then we may shift the window according to the value of this byte. // In practice, this saves us from unnecessarily checking file bytes to calculate the shift. byte lastByte = targetFile.getByte(startPosInFile + byteLoopEnd); if (byteSequence[byteLoopEnd] != lastByte) { startPosInFile += (this.shiftFunction[128 + lastByte] - 1); if ((startPosInFile < 0L) || (startPosInFile > fileSize)) { break; } } else { // If the last bytes don't match, then check the rest. for (int iByte = byteLoopStart; (!missMatchFound) && (iByte <= numSeqBytes - 1) && (iByte >= 0); iByte += searchDirection) { missMatchFound = (byteSequence[iByte] != targetFile.getByte(startPosInFile + iByte - byteLoopStart)); } if (!missMatchFound) { //subsequence was found at position fileMarker in the file //Now search for fragments between original fileMarker and startPosInFile if (reverseOrder) { long rightFragEnd; long[] rightFragEndArray = bytePosForRightFragments(targetFile, startPosInFile + 1, targetFile.getFileMarker(), 1, 0, bigEndian); if (rightFragEndArray.length == 0) { missMatchFound = true; } else { rightFragEnd = rightFragEndArray[0]; long leftFragEnd; long[] leftFragEndArray = bytePosForLeftFragments(targetFile, 0, startPosInFile - numSeqBytes, -1, 0, bigEndian); if (leftFragEndArray.length == 0) { missMatchFound = true; } else { leftFragEnd = leftFragEndArray[0]; targetFile.setFileMarker(leftFragEnd - 1L); subSeqFound = true; } } } else { //search is in forward direction long leftFragEnd; long[] leftFragEndArray = bytePosForLeftFragments(targetFile, targetFile.getFileMarker(), startPosInFile - 1L, -1, 0, bigEndian); if (leftFragEndArray.length == 0) { missMatchFound = true; } else { leftFragEnd = leftFragEndArray[0]; long rightFragEnd; long[] rightFragEndArray = bytePosForRightFragments(targetFile, startPosInFile + numSeqBytes, targetFile.getNumBytes() - 1L, 1, 0, bigEndian); if (rightFragEndArray.length == 0) { missMatchFound = true; } else { rightFragEnd = rightFragEndArray[0]; targetFile.setFileMarker(rightFragEnd + 1L); subSeqFound = true; } } } } if (missMatchFound) { // If a mismatch is found, then shift the window by a shift calculated from the value // of the file byte occuring one place after the window position. startPosInFile += this.shiftFunction[128 + targetFile.getByte(startPosInFile + (long) (searchDirection * numSeqBytes))]; if ((startPosInFile < 0L) || (startPosInFile > fileSize)) { break; } } } } } catch (IndexOutOfBoundsException e) { // This only happens when the end of the file is reached. // This exception is allowed to be thrown to avoid repeatedly checking if the index is valid // and to hence improve the performace of DROID } } catch (IndexOutOfBoundsException e) { // This is thrown if targetFile is a URLByteReader, the embedded HeapByteBuffer will check for each access // and throw java.lang.IndexOutOfBoundsException if we are on or past the limit } return subSeqFound; } /** * Interpret the bytes in a file as an offset. * <p/> * The next <code>indirectOffsetLength()</code> bytes after <code>indirectOffsetLocation()</code> are interpreted * as an offset according to the endianness of the byte sequence. * * @param targetFile * @return */ private int getIndirectOffset(ByteReader targetFile) { int offset = 0; long power = 1; long offsetLocation = this.getByteSequence().getIndirectOffsetLocation(); if (this.getByteSequence().getReference().endsWith("EOFoffset")) { offsetLocation = targetFile.getNumBytes() - offsetLocation - 1; } int offsetLength = this.getByteSequence().getIndirectOffsetLength(); // In the case of indirect BOF or indirect EOF bytesequences, // We need to get read the file to get the offset. if (this.isBigEndian()) { for (int i = offsetLength - 1; i > -1; i--) { Byte fileByte = targetFile.getByte(offsetLocation + i); int byteValue = fileByte.intValue(); byteValue = (byteValue >= 0) ? byteValue : byteValue + 256; offset += power * byteValue; power *= 256; } } else { for (int i = 0; i < offsetLength; i++) { Byte fileByte = targetFile.getByte(offsetLocation + i); int byteValue = fileByte.intValue(); byteValue = (byteValue >= 0) ? byteValue : byteValue + 256; offset += power * byteValue; power *= 256; } } return offset; } /** * Searches for this subsequence at the start of the current file. * Moves the file marker to the end of this subsequence. * * @param targetFile the binary file to be identified * @param reverseOrder true if file is being searched from right to left * @param bigEndian True iff our parent signature is big-endian * @return */ public boolean isFoundAtStartOfFile(ByteReader targetFile, boolean reverseOrder, boolean bigEndian) { try { int searchDirection = reverseOrder ? -1 : 1; int minSeqOffset = this.getMinSeqOffset(); int maxSeqOffset = this.getMaxSeqOffset(); // Get any indirect offset if (this.reference.startsWith("Indirect")) { try { int indirectOffset = this.getIndirectOffset(targetFile); minSeqOffset += indirectOffset; maxSeqOffset += indirectOffset; } catch (Exception e) { // If an exception is thrown, we can assume that the file did not match the indirect offset // eg. the indirect offset found could be too large to be held in an int scope return false; } } long[] startPosInFile = new long[1]; startPosInFile[0] = reverseOrder ? targetFile.getNumBytes() - minSeqOffset - 1 : minSeqOffset; boolean subseqFound = true; boolean leftFrag = true; if (reverseOrder) { leftFrag = false; } //match intial fragment if (reverseOrder) { startPosInFile = bytePosForRightFragments(targetFile, 0, startPosInFile[0], -1, (maxSeqOffset - minSeqOffset), bigEndian); } else { startPosInFile = bytePosForLeftFragments(targetFile, startPosInFile[0], targetFile.getNumBytes() - 1, 1, (maxSeqOffset - minSeqOffset), bigEndian); } int numOptions = startPosInFile.length; if (numOptions == 0) { subseqFound = false; } else { for (int i = 0; i < numOptions; i++) { startPosInFile[i] += (long) searchDirection; } } //match main sequence if (subseqFound) { //move startPosInFile according to min offset of last fragment looked at int minOffset = 0; int maxOffset = 0; if (this.getNumFragmentPositions(leftFrag) > 0) { minOffset = this.getFragment(leftFrag, 1, 0).getMinOffset(); maxOffset = this.getFragment(leftFrag, 1, 0).getMaxOffset(); for (int i = 0; i < numOptions; i++) { startPosInFile[i] += (long) (minOffset * searchDirection); } } //add new possible values for startPosInFile to allow for difference between maxOffset and minOffset int offsetRange = maxOffset - minOffset; if (offsetRange > 0) { long[] newStartPosInFile = new long[numOptions * (offsetRange + 1)]; for (int i = 0; i <= offsetRange; i++) { for (int j = 0; j < numOptions; j++) { newStartPosInFile[j + i * numOptions] = startPosInFile[j] + (long) (i * searchDirection); } } Arrays.sort(newStartPosInFile); int newNumOptions = 1; for (int i = 1; i < numOptions * (offsetRange + 1); i++) { if (newStartPosInFile[i] > newStartPosInFile[newNumOptions - 1]) { newStartPosInFile[newNumOptions] = newStartPosInFile[i]; newNumOptions++; } } //now copy these back to the startPosInFile array (sorted in searchDirection) numOptions = newNumOptions; if (searchDirection > 1) { System.arraycopy(newStartPosInFile, 0, startPosInFile, 0, numOptions); } else { //reverse order copy for (int i = 0; i < numOptions; i++) { startPosInFile[i] = newStartPosInFile[numOptions - 1 - i]; } } } //check that the end of the file is not going to be reached int numSeqBytes = this.getNumBytes(); long numBytesInFile = targetFile.getNumBytes(); if (reverseOrder) { //cutoff if startPosInFile is too close to start of file for (int i = 0; i < numOptions; i++) { if (startPosInFile[i] < ((long) numSeqBytes - 1L)) { numOptions = i; } } } else { //cutoff if startPosInFile is too close to end of file for (int i = 0; i < numOptions; i++) { if (startPosInFile[i] > (numBytesInFile - (long) numSeqBytes)) { numOptions = i; } } } for (int iOption = 0; iOption < numOptions; iOption++) { //compare sequence with file contents directly at fileMarker position int byteLoopStart = reverseOrder ? numSeqBytes - 1 : 0; int byteLoopEnd = reverseOrder ? 0 : numSeqBytes - 1; long tempFileMarker = startPosInFile[iOption]; boolean provSeqMatch = true; //check whether the file and signature sequences match for (int iByte = byteLoopStart; (provSeqMatch) && (iByte <= numSeqBytes - 1) && (iByte >= 0); iByte += searchDirection) { provSeqMatch = (byteSequence[iByte] == targetFile.getByte(tempFileMarker)); tempFileMarker += searchDirection; } if (!provSeqMatch) { //no match startPosInFile[iOption] = -2L; } else { //success: a match was found - update the startPosInFile startPosInFile[iOption] = tempFileMarker; } } //check the startPosInFile array: remove -2 values, reorder and remove duplicates Arrays.sort(startPosInFile, 0, numOptions); int newNumOptions = 0; long[] newStartPosInFile = new long[numOptions]; if (numOptions > 0) { if (startPosInFile[0] >= -1L) { newStartPosInFile[0] = startPosInFile[0]; newNumOptions = 1; } } for (int i = 1; i < numOptions; i++) { if (startPosInFile[i] > startPosInFile[i - 1]) { newStartPosInFile[newNumOptions] = startPosInFile[i]; newNumOptions++; } } if (newNumOptions == 0) { subseqFound = false; } else { numOptions = newNumOptions; if (searchDirection < 0) { //for right to left search direction, reorder in reverse for (int iOption = 0; iOption < numOptions; iOption++) { startPosInFile[iOption] = newStartPosInFile[numOptions - 1 - iOption]; } } else { //for left to right search direction, copy over as is System.arraycopy(newStartPosInFile, 0, startPosInFile, 0, numOptions); } } } //match remaining sequence fragment long newValueStartPosInFile = 0L; if (subseqFound) { long[] newArrayStartPosInFile; if (reverseOrder) { int i = 0; subseqFound = false; while (i < numOptions && !subseqFound) { newArrayStartPosInFile = bytePosForLeftFragments(targetFile, 0L, startPosInFile[i], -1, 0, bigEndian); if (newArrayStartPosInFile.length == 0) { subseqFound = false; } else { subseqFound = true; newValueStartPosInFile = newArrayStartPosInFile[0] - 1L; //take away -1??? } i++; } } else { int i = 0; subseqFound = false; while (i < numOptions && !subseqFound) { newArrayStartPosInFile = bytePosForRightFragments(targetFile, startPosInFile[i], targetFile.getNumBytes() - 1L, 1, 0, bigEndian); if (newArrayStartPosInFile.length == 0) { subseqFound = false; } else { subseqFound = true; newValueStartPosInFile = newArrayStartPosInFile[0] + 1L; //take away +1???? } i++; } } } //update the file marker if (subseqFound) { targetFile.setFileMarker(newValueStartPosInFile); } return subseqFound; } catch (IndexOutOfBoundsException e) { // If an indirect offset points to a place that is after the end of the file, // Then this exception is thrown and it can be assumed that the signature is not compliant return false; } } /** * Searches for the left fragments of this subsequence between the given byte * positions in the file. Either returns the last byte taken up by the * identified sequences or returns -2 if no match was found * * @param targetFile the binary file to be identified * @param leftBytePos left-most byte position of allowed search window on file * @param rightBytePos right-most byte position of allowed search window on file * @param searchDirection 1 for a left to right search, -1 for right to left * @param offsetRange range of possible start positions in the direction of searchDirection * @param bigEndian True iff our parent signature is big-endian * @return */ private long[] bytePosForLeftFragments(ByteReader targetFile, long leftBytePos, long rightBytePos, int searchDirection, int offsetRange, boolean bigEndian) { boolean leftFrag = true; long startPos = rightBytePos; int posLoopStart = 1; int numFragPos = this.getNumFragmentPositions(leftFrag); if (searchDirection == 1) { startPos = leftBytePos; posLoopStart = numFragPos; } //now set up the array so that it can potentially hold all possibilities int totalNumOptions = offsetRange + 1; for (int iFragPos = 1; iFragPos <= numFragPos; iFragPos++) { totalNumOptions = totalNumOptions * this.getNumAlternativeFragments(leftFrag, iFragPos); } long[] markerPos = new long[totalNumOptions]; for (int iOffset = 0; iOffset <= offsetRange; iOffset++) { markerPos[iOffset] = startPos + iOffset * searchDirection; } int numOptions = 1 + offsetRange; boolean seqNotFound = false; for (int iFragPos = posLoopStart; (!seqNotFound) && (iFragPos <= numFragPos) && (iFragPos >= 1); iFragPos -= searchDirection) { int numAltFrags = this.getNumAlternativeFragments(leftFrag, iFragPos); long[] tempEndPos = new long[numAltFrags * numOptions]; //array to store possible end positions after this fragment position has been examined int numEndPos = 0; for (int iOption = 0; iOption < numOptions; iOption++) { //will now look for all matching alternative sequence at the current end positions for (int iAlt = 0; iAlt < numAltFrags; iAlt++) { long tempFragEnd; if (searchDirection == 1) { tempFragEnd = this.endBytePosForSeqFrag(targetFile, markerPos[iOption], rightBytePos, true, searchDirection, iFragPos, iAlt, bigEndian); } else { tempFragEnd = this.endBytePosForSeqFrag(targetFile, leftBytePos, markerPos[iOption], true, searchDirection, iFragPos, iAlt, bigEndian); } if (tempFragEnd > -1L) { // amatch has been found tempEndPos[numEndPos] = tempFragEnd + searchDirection; numEndPos += 1; } } } if (numEndPos == 0) { seqNotFound = true; } else { numOptions = 0; for (int iOption = 0; iOption < numEndPos; iOption++) { //eliminate any repeated end positions boolean addEndPos = true; for (int iMarker = 0; iMarker < numOptions; iMarker++) { if (markerPos[iMarker] == tempEndPos[iOption]) { addEndPos = false; break; } } if (addEndPos) { markerPos[numOptions] = tempEndPos[iOption]; numOptions++; } } } } //prepare array to be returned if (seqNotFound) { // no possible positions found, return 0 length array long[] outArray = new long[0]; return outArray; } else { // return ordered array of possibilities long[] outArray = new long[numOptions]; //convert values to negative temporarily so that reverse sort order can be obtained for a right to left search direction if (searchDirection < 0) { for (int iOption = 0; iOption < numOptions; iOption++) { markerPos[iOption] = -markerPos[iOption]; } } //sort the values in the array Arrays.sort(markerPos, 0, numOptions); //convert values back to positive now that a reverse sort order has been obtained if (searchDirection < 0) { for (int iOption = 0; iOption < numOptions; iOption++) { markerPos[iOption] = -markerPos[iOption]; } } //copy to a new array which has precisely the correct length System.arraycopy(markerPos, 0, outArray, 0, numOptions); //correct the value for (int iOption = 0; iOption < numOptions; iOption++) { outArray[iOption] -= (long) searchDirection; } return outArray; } } /** * Searches for the right fragments of this subsequence between the given byte * positions in the file. Either returns the last byte taken up by the * identified sequences or returns -2 if no match was found * * @param targetFile the binary file to be identified * @param leftBytePos left-most byte position of allowed search window on file * @param rightBytePos right-most byte position of allowed search window on file * @param searchDirection 1 for a left to right search, -1 for right to left * @param offsetRange range of possible start positions in the direction of searchDirection * @param bigEndian True iff our parent signature is big-endian * @return */ private long[] bytePosForRightFragments(ByteReader targetFile, long leftBytePos, long rightBytePos, int searchDirection, int offsetRange, boolean bigEndian) { boolean leftFrag = false; long startPos = leftBytePos; int posLoopStart = 1; int numFragPos = this.getNumFragmentPositions(leftFrag); if (searchDirection == -1) { startPos = rightBytePos; posLoopStart = numFragPos; } //now set up the array so that it can potentially hold all possibilities int totalNumOptions = offsetRange + 1; for (int iFragPos = 1; iFragPos <= numFragPos; iFragPos++) { totalNumOptions = totalNumOptions * this.getNumAlternativeFragments(leftFrag, iFragPos); } long[] markerPos = new long[totalNumOptions]; for (int iOffset = 0; iOffset <= offsetRange; iOffset++) { markerPos[iOffset] = startPos + iOffset * searchDirection; } int numOptions = 1 + offsetRange; boolean seqNotFound = false; for (int iFragPos = posLoopStart; (!seqNotFound) && (iFragPos <= numFragPos) && (iFragPos >= 1); iFragPos += searchDirection) { int numAltFrags = this.getNumAlternativeFragments(leftFrag, iFragPos); long[] tempEndPos = new long[numAltFrags * numOptions]; //array to store possible end positions after this fragment position has been examined int numEndPos = 0; for (int iOption = 0; iOption < numOptions; iOption++) { //will now look for all matching alternative sequence at the current end positions for (int iAlt = 0; iAlt < numAltFrags; iAlt++) { long tempFragEnd; if (searchDirection == -1) { tempFragEnd = this.endBytePosForSeqFrag(targetFile, leftBytePos, markerPos[iOption], false, searchDirection, iFragPos, iAlt, bigEndian); } else { tempFragEnd = this.endBytePosForSeqFrag(targetFile, markerPos[iOption], rightBytePos, false, searchDirection, iFragPos, iAlt, bigEndian); } if (tempFragEnd > -1) { // amatch has been found tempEndPos[numEndPos] = tempFragEnd + searchDirection; numEndPos += 1; } } } if (numEndPos == 0) { seqNotFound = true; } else { numOptions = 0; for (int iOption = 0; iOption < numEndPos; iOption++) { //eliminate any repeated end positions boolean addEndPos = true; for (int iMarker = 0; iMarker < numOptions; iMarker++) { if (markerPos[iMarker] == tempEndPos[iOption]) { addEndPos = false; break; } } if (addEndPos) { markerPos[numOptions] = tempEndPos[iOption]; numOptions++; } } } } //prepare array to be returned if (seqNotFound) { // no possible positions found, return 0 length array long[] outArray = new long[0]; return outArray; } else { // return ordered array of possibilities long[] outArray = new long[numOptions]; //convert values to negative temporarily so that reverse sort order can be obtained for a right to left search direction if (searchDirection < 0) { for (int iOption = 0; iOption < numOptions; iOption++) { markerPos[iOption] = -markerPos[iOption]; } } //sort the values in the array Arrays.sort(markerPos, 0, numOptions); //convert values back to positive now that a reverse sort order has been obtained if (searchDirection < 0) { for (int iOption = 0; iOption < numOptions; iOption++) { markerPos[iOption] = -markerPos[iOption]; } } //copy to a new array which has precisely the correct length System.arraycopy(markerPos, 0, outArray, 0, numOptions); //correct the value for (int iOption = 0; iOption < numOptions; iOption++) { outArray[iOption] -= (long) searchDirection; } return outArray; } } /** * searches for the specified fragment sequence * between the leftmost and rightmost byte positions that are given. * returns the end position of the found sequence or -1 if it is not found * * @param targetFile The file that is being reviewed for identifier * @param leftEndBytePos leftmost position in file at which to search * @param rightEndBytePos rightmost postion in file at which to search * @param leftFrag flag to indicate whether looking at left or right fragments * @param searchDirection direction in which search is carried out (1 for left to right, -1 for right to left) * @param fragPos position of left/right sequence fragment to use * @param fragIndex index of fragment within the position (where alternatives exist) * @param bigEndian True iff out parent signature is big-endian * @return */ private long endBytePosForSeqFrag(ByteReader targetFile, long leftEndBytePos, long rightEndBytePos, boolean leftFrag, int searchDirection, int fragPos, int fragIndex, boolean bigEndian) { long startPosInFile; long lastStartPosInFile; long endPosInFile = -1L; long searchDirectionL = (long) searchDirection; int numBytes; int minOffset; int maxOffset; // read in values numBytes = this.getFragment(leftFrag, fragPos, fragIndex).getNumBytes(); if (leftFrag && (searchDirection == -1)) { minOffset = this.getFragment(leftFrag, fragPos, fragIndex).getMinOffset(); maxOffset = this.getFragment(leftFrag, fragPos, fragIndex).getMaxOffset(); } else if (!leftFrag && (searchDirection == 1)) { minOffset = this.getFragment(leftFrag, fragPos, fragIndex).getMinOffset(); maxOffset = this.getFragment(leftFrag, fragPos, fragIndex).getMaxOffset(); } else if (fragPos < this.getNumFragmentPositions(leftFrag)) { minOffset = this.getFragment(leftFrag, fragPos + 1, 0).getMinOffset(); maxOffset = this.getFragment(leftFrag, fragPos + 1, 0).getMaxOffset(); } else { minOffset = 0; maxOffset = 0; } // set up start and end positions for searches taking into account min and max offsets if (searchDirection == -1) { startPosInFile = rightEndBytePos - (long) minOffset; long lastStartPosInFile1 = leftEndBytePos + (long) numBytes - 1L; long lastStartPosInFile2 = rightEndBytePos - (long) maxOffset; lastStartPosInFile = (lastStartPosInFile1 < lastStartPosInFile2) ? lastStartPosInFile2 : lastStartPosInFile1; } else { startPosInFile = leftEndBytePos + (long) minOffset; long lastStartPosInFile1 = rightEndBytePos - (long) numBytes + 1L; long lastStartPosInFile2 = leftEndBytePos + (long) maxOffset; lastStartPosInFile = (lastStartPosInFile1 < lastStartPosInFile2) ? lastStartPosInFile1 : lastStartPosInFile2; } //keep searching until either the sequence fragment is found or until the end of the search area has been reached. //compare sequence with file contents directly at fileMarker position boolean subSeqFound = false; while ((!subSeqFound) && ((searchDirectionL) * (lastStartPosInFile - startPosInFile) >= 0L)) { boolean missMatchFound = false; int byteLoopStart; if (searchDirection == -1) { byteLoopStart = numBytes - 1; } else { byteLoopStart = 0; } SideFragment fragment = this.getFragment(leftFrag, fragPos, fragIndex); long tempFileMarker = startPosInFile; for (int i = (searchDirection == 1) ? 0 : fragment.getNumByteSeqSpecifiers() - 1; !missMatchFound && 0 <= i && i < fragment.getNumByteSeqSpecifiers(); i += searchDirection) { missMatchFound = !fragment.getByteSeqSpecifier(i).matchesByteSequence(targetFile, tempFileMarker, searchDirection, bigEndian); if (!missMatchFound) { tempFileMarker += searchDirection * fragment.getByteSeqSpecifier(i).getNumBytes(); } } if (!missMatchFound) { //subsequence fragment was found in the file subSeqFound = true; endPosInFile = tempFileMarker - searchDirectionL; } else { startPosInFile += searchDirectionL; } } return endPosInFile; //this is -1 unless subSeqFound = true } public String toString() { return position + " seq=<" + sequence + ">" + "LLL" + orderedLeftFragments + "LLL" + "RRR" + orderedRightFragments + "RRR"; } }