/*
* Copyright The National Archives 2005-2006. All rights reserved.
* See Licence.txt for full licence details.
*
* Developed by:
* Tessella Support Services plc
* 3 Vineyard Chambers
* Abingdon, OX14 3PX
* United Kingdom
* http://www.tessella.com
*
* Tessella/NPD/4305
* PRONOM 4
*
* $Id: FFSignatureFile.java,v 1.6 2006/03/13 15:15:29 linb Exp $
*
* $Log: FFSignatureFile.java,v $
* Revision 1.6 2006/03/13 15:15:29 linb
* Changed copyright holder from Crown Copyright to The National Archives.
* Added reference to licence.txt
* Changed dates to 2005-2006
*
* Revision 1.5 2006/02/07 17:16:22 linb
* - Change fileReader to ByteReader in formal parameters of methods
* - use new static constructors
* - Add detection of if a filePath is a URL or not
*
* Revision 1.4 2006/02/07 12:34:57 gaur
* Removed restriction on priority relationships so that they can be applied between any combination of generic and specific signatures (second recommit because of missing logging)
*
*
* $History: FFSignatureFile.java $
*
* ***************** Version 7 *****************
* User: Walm Date: 19/04/05 Time: 18:24
* Updated in $/PRONOM4/FFIT_SOURCE/signatureFile
* Provide initial values for version and dateCreated
*
* ***************** Version 6 *****************
* User: Walm Date: 18/03/05 Time: 12:39
* Updated in $/PRONOM4/FFIT_SOURCE/signatureFile
* add some more exception handling
*
* ***************** Version 5 *****************
* User: Walm Date: 15/03/05 Time: 14:39
* Updated in $/PRONOM4/FFIT_SOURCE/signatureFile
* fileReader class now holds reference to identificationFile object
*
* ***************** Version 4 *****************
* User: Mals Date: 14/03/05 Time: 15:08
* Updated in $/PRONOM4/FFIT_SOURCE/signatureFile
* Takes into account of IdentificationFile objects in checkExtension
*
* ***************** Version 3 *****************
* User: Mals Date: 14/03/05 Time: 14:30
* Updated in $/PRONOM4/FFIT_SOURCE/signatureFile
* runFileIdentification accepts IdentificationFile parameter
*
*
* * *****************************************
* S. Morrissey For JHOVE2 Date 09/12/2009
* refactored to use IAnalaysis Controller for constants,
* and AnalysisControllerUtil for static methods
*
*
*/
package uk.gov.nationalarchives.droid.signatureFile;
import java.util.ArrayList;
import java.util.List;
import uk.gov.nationalarchives.droid.FileFormatHit;
import uk.gov.nationalarchives.droid.JHOVE2IAnalysisController;
import uk.gov.nationalarchives.droid.MessageDisplay;
import uk.gov.nationalarchives.droid.binFileReader.ByteReader;
import uk.gov.nationalarchives.droid.xmlReader.SimpleElement;
/**
* holds details of a signature file and uses it to identify binary files
*
* @author Martin Waller
* @version 4.0.0
*/
public class FFSignatureFile extends SimpleElement {
private String version = "";
private String dateCreated = "";
private FileFormatCollection FFcollection;
private InternalSignatureCollection intSigs;
/* setters */
public void setFileFormatCollection(FileFormatCollection coll) {
this.FFcollection = coll;
}
public void setInternalSignatureCollection(InternalSignatureCollection col3) {
this.intSigs = col3;
}
private void setVersion(String vers) {
this.version = vers;
}
private void setDateCreated(String dc) {
this.dateCreated = dc;
}
public void setAttributeValue(String name, String value) {
if (name.equals("Version")) {
setVersion(value.trim());
} else if (name.equals("DateCreated")) {
setDateCreated(value);
} else {
MessageDisplay.unknownAttributeWarning(name, this.getElementName());
}
}
/* getters */
public int getNumInternalSignatures() {
return this.intSigs.getInternalSignatures().size();
}
public List<InternalSignature> getSignatures() {
return intSigs.getInternalSignatures();
}
public InternalSignature getInternalSignature(int theIndex) {
return intSigs.getInternalSignatures().get(theIndex);
}
public int getNumFileFormats() {
return this.FFcollection.getFileFormats().size();
}
public FileFormat getFileFormat(int theIndex) {
return (FileFormat) FFcollection.getFileFormats().get(theIndex);
}
public String getVersion() {
return version;
}
public String getDateCreated() {
return dateCreated;
}
/**
* This method must be run after the signature file data has been read
* and before the FFSignatureFile class is used.
* It points internal signatures to the fileFormat objects they identify,
* and it ensures that the sequence fragments are in the correct order.
*/
public void prepareForUse() {
this.setAllSignatureFileFormats();
this.reorderAllSequenceFragments();
this.reorderByteSequences();
}
/**
* Points all internal signatures to the fileFormat objects they identify.
*/
private void setAllSignatureFileFormats() {
for (int iFormat = 0; iFormat < this.getNumFileFormats(); iFormat++) { //loop through file formats
for (int iFileSig = 0; iFileSig < this.getFileFormat(iFormat).getNumInternalSignatures(); iFileSig++) { //loop through internal signatures for each file format
int iFileSigID = this.getFileFormat(iFormat).getInternalSignatureID(iFileSig);
//loop through all internal signatures to find one with a matching ID
for (int iIntSig = 0; iIntSig < this.getNumInternalSignatures(); iIntSig++) {
if (this.getInternalSignature(iIntSig).getID() == iFileSigID) {
this.getInternalSignature(iIntSig).addFileFormat(this.getFileFormat(iFormat));
break;
}
}
}
}
}
/**
* Run prepareSeqFragments on all subSequences within all ByteSequences within all internalSignatures.
*/
private void reorderAllSequenceFragments() {
for (int iSig = 0; iSig < this.getNumInternalSignatures(); iSig++) {
for (int iBS = 0; iBS < this.getInternalSignature(iSig).getNumByteSequences(); iBS++) {
for (int iSS = 0; iSS < this.getInternalSignature(iSig).getByteSequence(iBS).getNumSubSequences(); iSS++) {
this.getInternalSignature(iSig).getByteSequence(iBS).getSubSequence(iSS).prepareSeqFragments();
}
}
}
}
/**
* Ensure that the BOFs and EOFs are searched for before the variable position byte sequences
*/
private void reorderByteSequences() {
for (int iSig = 0; iSig < this.getNumInternalSignatures(); iSig++) {
InternalSignature sig = this.getInternalSignature(iSig);
List<ByteSequence> BOFoffsetByteSequences = new ArrayList<ByteSequence>();
List<ByteSequence> EOFoffsetByteSequences = new ArrayList<ByteSequence>();
List<ByteSequence> variableByteSequences = new ArrayList<ByteSequence>();
for (int iBS = 0; iBS < sig.getNumByteSequences(); iBS++) {
ByteSequence seq = sig.getByteSequence(iBS);
if (seq.getReference().startsWith("BOF")) {
BOFoffsetByteSequences.add(seq);
}
if (seq.getReference().startsWith("EOF")) {
EOFoffsetByteSequences.add(seq);
} else {
variableByteSequences.add(seq);
}
}
List<ByteSequence> byteSequences = new ArrayList<ByteSequence>();
byteSequences.addAll(BOFoffsetByteSequences);
byteSequences.addAll(EOFoffsetByteSequences);
byteSequences.addAll(variableByteSequences);
sig.resetByteSequences(byteSequences);
}
}
/**
* Identify the current file
*
* @param targetFile The binary file to be identified
*/
public synchronized void runFileIdentification(ByteReader targetFile) {
List<InternalSignature> signatureList = getSignatures();
//record all positive identifications
for (InternalSignature internalSig : signatureList) {
if (internalSig.isFileCompliant(targetFile)) {
//File matches this internal signature
targetFile.setPositiveIdent();
for (int i = 0; i < internalSig.getNumFileFormats(); i++) {
FileFormatHit fileHit = new FileFormatHit(internalSig.getFileFormat(i),
JHOVE2IAnalysisController.HIT_TYPE_POSITIVE_GENERIC_OR_SPECIFIC, internalSig.isSpecific(), "");
targetFile.addHit(fileHit);
}
}
}
//remove any hits for which there is a higher priority hit
if (targetFile.getNumHits() > 1) {
this.removeLowerPriorityHits(targetFile);
}
//carry out file extension checking
this.checkExtension(targetFile);
// if there are still no hits then classify as unidentified
if (targetFile.getNumHits() == 0) {
targetFile.setNoIdent();
}
}
/**
* Remove any hits for which there is a higher priority hit
*
* @param targetFile The binary file to be identified
*/
private void removeLowerPriorityHits(ByteReader targetFile) {
//loop through specific hits and list any hits which these have priority over
List<Integer> hitsToRemove = new ArrayList<Integer>();
for (int i = 0; i < targetFile.getNumHits(); i++) {
for (int j = 0; j < targetFile.getHit(i).getFileFormat().getNumHasPriorityOver(); j++) {
int formatID = targetFile.getHit(i).getFileFormat().getHasPriorityOver(j);
for (int k = 0; k < targetFile.getNumHits(); k++) { //loop through hits to find any for this file format
if (targetFile.getHit(k).getFileFormat().getID() == formatID) {
hitsToRemove.add(k); //use string representation as ArrayList won't take integers
break;
}
}
}
}
//Create sorted array of indexes for hits to be removed
int[] indexesOfHits = new int[hitsToRemove.size()];
int numHitsToRemove = 0;
for (Integer aHitsToRemove : hitsToRemove) { //loop through unsorted list of hits to be removed
int j = numHitsToRemove;
int indexOfHit = aHitsToRemove;
while (j > 0 && indexesOfHits[j - 1] > indexOfHit) {
indexesOfHits[j] = indexesOfHits[j - 1];
--j;
}
indexesOfHits[j] = indexOfHit;
++numHitsToRemove;
}
//Delete hits in decreasing index order, ignorinmg any repetitions
for (int i = indexesOfHits.length - 1; i >= 0; i--) {
if (i == (indexesOfHits.length - 1)) {
targetFile.removeHit(indexesOfHits[i]);
} else if (indexesOfHits[i] != indexesOfHits[i + 1]) {
targetFile.removeHit(indexesOfHits[i]);
}
}
}
/**
* Determines the file extension
* If the file has got some positive hits, then check these against this extension
* If the file has not got any positive hits, then look for tentative hits
* based on the extension only.
*
* @param targetFile The binary file to be identified
*/
private void checkExtension(ByteReader targetFile) {
//work out if file has an extension
boolean hasExtension = true;
int dotPos = targetFile.getFileName().lastIndexOf(".");
if (dotPos < 0) {
hasExtension = false;
} else if (dotPos == targetFile.getFileName().length() - 1) {
hasExtension = false;
} else if (targetFile.getFileName().lastIndexOf("/") > dotPos) {
hasExtension = false;
} else if (targetFile.getFileName().lastIndexOf("\\") > dotPos) {
hasExtension = false;
}
//
if (hasExtension) {
String fileExtension = targetFile.getFileName().substring(dotPos + 1);
if (targetFile.getNumHits() > 0) {
//for each file format which is a hit, check that it expects the given extension - if not give a warning
for (int iHit = 0; iHit < targetFile.getNumHits(); iHit++) {
if (!(targetFile.getHit(iHit).getFileFormat().hasMatchingExtension(fileExtension))) {
targetFile.getHit(iHit).setIdentificationWarning(MessageDisplay.FILEEXTENSIONWARNING);
}
}//loop through hits
} else {
//no positive hits have been found, so search for tenative hits
//loop through all file formats with no internal signature
for (int iFormat = 0; iFormat < this.getNumFileFormats(); iFormat++) {
if (this.getFileFormat(iFormat).getNumInternalSignatures() == 0) {
if (this.getFileFormat(iFormat).hasMatchingExtension(fileExtension)) {
//add this as a tentative hit
FileFormatHit fileHit = new FileFormatHit(this.getFileFormat(iFormat),
JHOVE2IAnalysisController.HIT_TYPE_TENTATIVE, false, "");
targetFile.addHit(fileHit);
targetFile.setTentativeIdent();
}
}
}//loop through file formats
}
}//end of if(hasExtension)
else {
//if the file does not have an extension then add warning to all its hits
for (int iHit = 0; iHit < targetFile.getNumHits(); iHit++) {
targetFile.getHit(iHit).setIdentificationWarning(MessageDisplay.FILEEXTENSIONWARNING);
}
}
}
}