package filetools.tiff;
import java.awt.BorderLayout;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import javax.swing.JButton;
import javax.swing.JFrame;
import javax.swing.JOptionPane;
import org.apache.commons.io.FilenameUtils;
import org.apache.sanselan.ImageReadException;
import org.apache.sanselan.Sanselan;
import org.apache.sanselan.common.IImageMetadata;
import org.apache.sanselan.formats.tiff.TiffDirectory;
import org.apache.sanselan.formats.tiff.TiffField;
import org.apache.sanselan.formats.tiff.TiffImageMetadata;
import org.apache.sanselan.formats.tiff.constants.TiffDirectoryConstants;
public class TiffFileAnalysis {
public static String examinedFolder;
static PrintWriter xmlsummary;
static int problematicTiffs;
public static ArrayList<TiffTagZbw> listTiffTags = new ArrayList<TiffTagZbw>();
public static void main(String args[]) throws IOException, ImageReadException {
try {
examinedFolder = utilities.BrowserDialogs.chooseFolder();
if (examinedFolder != null) {
int examinedTiffs = 0;
JFrame f = new JFrame();
JButton but = new JButton("... Program is running ... ");
f.add(but, BorderLayout.PAGE_END);
f.pack();
f.setVisible(true);
problematicTiffs = 0;
String name = JOptionPane.showInputDialog(null, "Please choose a name for the XML Outputfile.", "Enter String Mask", JOptionPane.PLAIN_MESSAGE);
;
String outputfile = examinedFolder + "//" + name + ".xml";
File outputfileFile = new File(outputfile);
int eingabe;
if (outputfileFile.exists()) {
eingabe = JOptionPane.showConfirmDialog(null, name + ".xml already exists in folder. Choosing \"Ja\" closes program. Choosing \"Nein\" overwrites existing File and start analysis.", "Confirmation", JOptionPane.YES_NO_OPTION, JOptionPane.PLAIN_MESSAGE);
}
else {
eingabe = 1;
}
if (eingabe == 1) {
PrintWriter xmlsummary = new PrintWriter(new FileWriter(outputfile));
ArrayList<File> files = utilities.ListsFiles.getPaths(new File(examinedFolder), new ArrayList<File>());
String xmlVersion = "xml version='1.0'";
String xmlEncoding = "encoding='ISO-8859-1'";
String xsltStyleSheet = "<?xml-stylesheet type=\"text/xsl\" href=\"TiffTagStyle.xsl\"?>";
// TODO: create Stylesheet in the same folder
output.XslStyleSheets.TiffTagAnalysisCustomizedXsl();
xmlsummary.println("<?" + xmlVersion + " " + xmlEncoding + "?>");
xmlsummary.println(xsltStyleSheet);
xmlsummary.println("<TiffTagAnalysis>");
for (int i = 0; i < files.size(); i++) {
String tiffExtension = "tif";
String extension = FilenameUtils.getExtension(files.get(i).getCanonicalPath()).toLowerCase();
if (extension.equals(tiffExtension)) {
if (filetools.GenericFileAnalysis.testFileHeaderTiff(files.get(i))) {
xmlsummary.println("<TiffFile>");
xmlsummary.println("<FilePath>" + files.get(i).toString() + "</FilePath>");
analyseTiffTags(files.get(i), xmlsummary);
examinedTiffs++;
xmlsummary.println("</TiffFile>");
}
else {
System.out.println("This file purports to be a tiff-file. It has a .TIF-extension, but no lacks the magic number.");
}
// how to get a certain tiff tag:
// TiffField tileWidthField =
// tiffDirectory.findField(TiffTagConstants.TIFF_TAG_BITS_PER_SAMPLE);
}
}
// How often does each TiffTag occur?
int tifftagscount = listTiffTags.size();
// save all tifftags in ArrayList<String>
ArrayList<String> alltifftags = new ArrayList<String>();
for (int i = 0; i < tifftagscount; i++) {
alltifftags.add(listTiffTags.get(i).tiffTagName);
}
Collections.sort(alltifftags);
ArrayList<String> origintifftags = new ArrayList<String>();
for (int i = 0; i < alltifftags.size(); i++) {
// function for this
origintifftags.add(alltifftags.get(i));
}
// get rid of redundant entries
int n = 0;
while (n < alltifftags.size() - 1) {
if (alltifftags.get(n).equals(alltifftags.get(n + 1))) {
alltifftags.remove(n);
} else {
n++;
}
}
xmlsummary.println("<AnalysisSummary>");
xmlsummary.println("<DifferentTiffTagsInSample>" + alltifftags.size() + "</DifferentTiffTagsInSample>");
// how often does each Tiff Tag occur?
int j = 0;
int temp;
for (n = 0; n < alltifftags.size(); n++) {
temp = 0;
for (j = 0; j < origintifftags.size(); j++) {
if (alltifftags.get(n).equals(origintifftags.get(j))) {
temp++;
}
}
xmlsummary.println("<DifferentTiffTags>");
xmlsummary.println("<TiffTag>" + alltifftags.get(n) + "</TiffTag>");
for (int i = 0; i < listTiffTags.size(); i++) {
if (listTiffTags.get(i).tiffTagName.equals(alltifftags.get(n))) {
xmlsummary.println("<SourceOfTag>" + listTiffTags.get(i).tiffTagKind + "</SourceOfTag>");
xmlsummary.println("<Description>" + listTiffTags.get(i).tiffTagDescription + "</Description>");
break; // very important, otherwise the
// description would be put ot as often
// as each tiff tag occurs in sample
}
}
xmlsummary.println("<Occurance>" + temp + "</Occurance>");
xmlsummary.println("</DifferentTiffTags>");
}
xmlsummary.println("<ProblematicTiffs>" + problematicTiffs + "</ProblematicTiffs>");
xmlsummary.println("<ExaminedTiffs>" + examinedTiffs + "</ExaminedTiffs>");
xmlsummary.println("</AnalysisSummary>");
xmlsummary.println("</TiffTagAnalysis>");
xmlsummary.close();
f.dispose();
}
else {
JOptionPane.showMessageDialog(null, "You have closed the program. No analysis was done.", "Information", JOptionPane.INFORMATION_MESSAGE);
f.dispose();
}
}
} catch (FileNotFoundException e) {
}
}
public static void analyseTiffTags(File file, PrintWriter xmlsummary) throws IOException, ImageReadException {
try {
IImageMetadata metadata = Sanselan.getMetadata(file);
TiffDirectory tiffDirectory = ((TiffImageMetadata) metadata).findDirectory(TiffDirectoryConstants.DIRECTORY_TYPE_ROOT);
@SuppressWarnings("unchecked")
ArrayList<TiffField> allEntries = tiffDirectory.getDirectoryEntrys();
tiffDirectory.dump();
xmlsummary.println("<TiffTagsCount>" + allEntries.size() + "</TiffTagsCount>");
xmlsummary.println("<TiffTags>");
xmlsummary.println("<FileName>" + file.getName() + "</FileName>");
for (int i = 0; i < allEntries.size(); i++) {
// replace all the different separators with ','
String editentry = allEntries.get(i).toString().replace(" (", ",");
editentry = editentry.replace("): ", ",");
editentry = editentry.replace(": ", ",");
editentry = editentry.replace(")", "");
String[] parts = editentry.split(",");
TiffTagZbw temp = new TiffTagZbw();
temp.decTiffTag = Integer.parseInt(parts[0]);
temp.hexValue = parts[1];
temp.tiffTagName = parts[2];
temp.tiffTagContent = (parts[3] + parts[4]);
/*
* xmlsummary.println("<decValue>" + temp.decTiffTag +
* "</decValue>"); xmlsummary.println("<hexValue>" +
* temp.hexValue + "</hexValue>");
* xmlsummary.println("<TiffTagName>" + temp.tiffTagName +
* "</TiffTagName>"); xmlsummary.println("<TiffTagContent>" +
* temp.tiffTagContent + "</TiffTagContent>");
*/
int privateTag = 32768;
int reusabletagbeginn = 65000;
int reusabletagend = 65535;
if (temp.decTiffTag > privateTag) {
if (temp.decTiffTag == 33432) {
xmlsummary.println("<Copyright>" + temp.tiffTagContent + "</Copyright>");
temp.tiffTagDescription = "Copyright notice.";
temp.tiffTagKind = "Baseline";
}
else if (temp.decTiffTag == 34665) {
xmlsummary.println("<ExifIfd>" + temp.tiffTagContent + "</ExifIfd>");
temp.tiffTagDescription = "A pointer to the Exif IFD. Private";
temp.tiffTagKind = "Private";
}
else if (temp.decTiffTag == 34675) {
xmlsummary.println("<InterColorProfile>" + temp.tiffTagContent + "</InterColorProfile>");
temp.tiffTagDescription = "ICC profile data.";
temp.tiffTagKind = "TIFF/EP spec, p. 47 Exif private IFD";
}
else if (temp.decTiffTag == 33723) {
xmlsummary.println("<IPTC_NAA>" + temp.tiffTagContent + "</IPTC_NAA>");
temp.tiffTagDescription = "IPTC-NAA (International Press Telecommunications Council-Newspaper Association of America) metadata.";
temp.tiffTagKind = "TIFF/EP spec, p. 33";
}
else {
xmlsummary.println("<UnknownTiffTag>" + temp.tiffTagName + "</UnknownTiffTag>");
temp.tiffTagKind = "Private";
}
}
else if ((reusabletagbeginn < temp.decTiffTag) && (temp.decTiffTag < reusabletagend)) {
xmlsummary.println("<ReusableTiffTag>");
xmlsummary.println("<UnknownTiffTag>" + temp.tiffTagName + "</UnknownTiffTag>");
temp.tiffTagKind = "Reusable";
JOptionPane.showMessageDialog(null, "Found a reusable Tiff Tag:" + temp.tiffTagName + temp.tiffTagContent, "Information", JOptionPane.INFORMATION_MESSAGE);
xmlsummary.println("</ReusableTiffTag>");
}
// TODO: if dec value > 32768, it is a private tag
// TODO: reusable range <65000 & < 65535
switch (temp.decTiffTag) {
case 255:
xmlsummary.println("<SubFileType>" + temp.tiffTagContent + "</SubFileType>");
temp.tiffTagDescription = "A general indication of the kind of data contained in this subfile.";
temp.tiffTagKind = "Baseline";
break;
case 256:
xmlsummary.println("<ImageWidth>" + temp.tiffTagContent + "</ImageWidth>");
temp.tiffTagDescription = "The number of columns in the image, i.e., the number of pixels per row.";
temp.tiffTagKind = "Baseline";
break;
case 257:
xmlsummary.println("<ImageLength>" + temp.tiffTagContent + "</ImageLength>");
temp.tiffTagDescription = "The number of rows of pixels in the image.";
temp.tiffTagKind = "Baseline";
break;
case 259:
xmlsummary.println("<Compression>" + temp.tiffTagContent + "</Compression>");
temp.tiffTagDescription = "Compression scheme used on the image data.";
temp.tiffTagKind = "Baseline";
break;
case 262:
xmlsummary.println("<Photometric>" + temp.tiffTagContent + "</Photometric>");
temp.tiffTagDescription = "The color space of the image data.";
temp.tiffTagKind = "Baseline";
break;
case 273:
xmlsummary.println("<StripOffSets>" + temp.tiffTagContent + "</StripOffSets>");
temp.tiffTagDescription = "For each strip, the byte offset of that strip.";
temp.tiffTagKind = "Baseline";
break;
case 277:
xmlsummary.println("<SamplesPerPixel>" + temp.tiffTagContent + "</SamplesPerPixel>");
temp.tiffTagDescription = "The number of components per pixel.";
temp.tiffTagKind = "Baseline";
break;
case 278:
xmlsummary.println("<RowsPerStrip>" + temp.tiffTagContent + "</RowsPerStrip>");
temp.tiffTagDescription = "The number of rows per strip.";
temp.tiffTagKind = "Baseline";
break;
case 279:
xmlsummary.println("<StripByteCounts>" + temp.tiffTagContent + "</StripByteCounts>");
temp.tiffTagDescription = "For each strip, the number of bytes in the strip after compression.";
temp.tiffTagKind = "Baseline";
break;
case 282:
xmlsummary.println("<XResolution>" + temp.tiffTagContent + "</XResolution>");
temp.tiffTagDescription = "The number of pixels per ResolutionUnit in the ImageWidth direction.";
temp.tiffTagKind = "Baseline";
break;
case 283:
xmlsummary.println("<YResolution>" + temp.tiffTagContent + "</YResolution>");
temp.tiffTagDescription = "The number of pixels per ResolutionUnit in the ImageLength direction.";
temp.tiffTagKind = "Baseline";
break;
case 296:
xmlsummary.println("<ResolutionUnit>" + temp.tiffTagContent + "</ResolutionUnit>");
temp.tiffTagDescription = "The unit of measurement for XResolution and YResolution.";
temp.tiffTagKind = "Baseline";
break;
case 258:
xmlsummary.println("<BitsPerSample>" + temp.tiffTagContent + "</BitsPerSample>");
temp.tiffTagDescription = "Number of bits per component.";
temp.tiffTagKind = "Baseline";
break;
// nice to have
case 280:
xmlsummary.println("<MinSampleValue>" + temp.tiffTagContent + "</MinSampleValue>");
temp.tiffTagDescription = "The minimum component value used.";
temp.tiffTagKind = "Extended";
break;
case 281:
xmlsummary.println("<MaxSampleValue>" + temp.tiffTagContent + "</MaxSampleValue>");
temp.tiffTagDescription = "The maximum component value used.";
break;
case 290:
xmlsummary.println("<GrayResponseUnit>" + temp.tiffTagContent + "</GrayResponseUnit>");
temp.tiffTagDescription = "The precision of the information contained in the GrayResponseCurve.";
temp.tiffTagKind = "Baseline";
break;
case 291:
xmlsummary.println("<GrayResponseCurve>" + temp.tiffTagContent + "</GrayResponseCurve>");
temp.tiffTagDescription = "For grayscale data, the optical density of each possible pixel value.";
temp.tiffTagKind = "Baseline";
break;
case 305:
xmlsummary.println("<Software>" + temp.tiffTagContent + "</Software>");
temp.tiffTagDescription = "Name and version number of the software package(s) used to create the image.";
temp.tiffTagKind = "Baseline";
break;
case 306:
xmlsummary.println("<DateTime>" + temp.tiffTagContent + "</DateTime>");
temp.tiffTagDescription = "Date ad time of image creation.";
temp.tiffTagKind = "Baseline";
break;
case 315:
xmlsummary.println("<Artist>" + temp.tiffTagContent + "</Artist>");
temp.tiffTagDescription = "Person who created the image.";
temp.tiffTagKind = "Baseline";
break;
case 320:
xmlsummary.println("<ColorMap>" + temp.tiffTagContent + "</ColorMap>");
temp.tiffTagDescription = "A color map for palette color images.";
temp.tiffTagKind = "Baseline";
break;
// not in Baseline, but nice to have for Digital Preservation
case 338:
xmlsummary.println("<ExtraSamples>" + temp.tiffTagContent + "</ExtraSamples>");
temp.tiffTagDescription = "Description of extra components.";
temp.tiffTagKind = "Baseline";
break;
// other known Tiff Tags
case 292:
xmlsummary.println("<T4Option>" + temp.tiffTagContent + "</T4Option>");
temp.tiffTagDescription = "Options for Group 3 Fax compression. Extended";
temp.tiffTagKind = "Extended";
break;
case 293:
xmlsummary.println("<T6Option>" + temp.tiffTagContent + "</T6Option>");
temp.tiffTagDescription = "Options for Group 4 Fax compression. Extended";
temp.tiffTagKind = "Extended";
break;
case 297:
xmlsummary.println("<PageNumber>" + temp.tiffTagContent + "</PageNumber>");
temp.tiffTagDescription = "The page number of the page from which this image was scanned. Extended";
temp.tiffTagKind = "Extended";
break;
case 266:
xmlsummary.println("<FillOrder>" + temp.tiffTagContent + "</FillOrder>");
temp.tiffTagDescription = "The logical order of bits within a byte.";
temp.tiffTagKind = "Baseline";
break;
case 274:
xmlsummary.println("<Orientation>" + temp.tiffTagContent + "</Orientation>");
temp.tiffTagDescription = "The orientation of the image with respect to the rows and columns.";
temp.tiffTagKind = "Baseline";
break;
case 284:
xmlsummary.println("<PlanarConfiguration>" + temp.tiffTagContent + "</PlanarConfiguration>");
temp.tiffTagDescription = "How the components of each pixel are stored.";
temp.tiffTagKind = "Baseline";
break;
case 317:
xmlsummary.println("<Predictor>" + temp.tiffTagContent + "</Predictor>");
temp.tiffTagDescription = "A mathematical operator that is applied to the image data before an encoding scheme is applied.";
temp.tiffTagKind = "Extended";
break;
case 318:
xmlsummary.println("<WhitePoint>" + temp.tiffTagContent + "</WhitePoint>");
temp.tiffTagDescription = "The chromaticity of the white point of the image.";
temp.tiffTagKind = "Extended";
break;
case 319:
xmlsummary.println("<PrimaryChromaticities>" + temp.tiffTagContent + "</PrimaryChromaticities>");
temp.tiffTagDescription = "The chromaticities of the primaries of the image.";
temp.tiffTagKind = "Extended";
break;
case 529:
xmlsummary.println("<YCbCrCoefficients>" + temp.tiffTagContent + "</YCbCrCoefficients>");
temp.tiffTagDescription = "The transformation from RGB to YCbCr image data.";
temp.tiffTagKind = "Extended";
break;
case 532:
xmlsummary.println("<ReferenceBlackWhite>" + temp.tiffTagContent + "</ReferenceBlackWhite>");
temp.tiffTagDescription = "Specifies a pair of headroom and footroom image data values (codes) for each pixel component.";
temp.tiffTagKind = "Extended";
break;
case 263:
xmlsummary.println("<Threshholding>" + temp.tiffTagContent + "</Threshholding>");
temp.tiffTagKind = "Baseline";
break;
case 339:
xmlsummary.println("<SampleFormat>" + temp.tiffTagContent + "</SampleFormat>");
temp.tiffTagDescription = "Specifies how to interpret each data sample in a pixel.";
temp.tiffTagKind = "Extended";
break;
case 512:
xmlsummary.println("<JPEGProc>" + temp.tiffTagContent + "</JPEGProc>");
temp.tiffTagDescription = "Old-style JPEG compression field. TechNote2 invalidates this part of the specification.";
temp.tiffTagKind = "Extended";
break;
case 519:
xmlsummary.println("<JPEGQTables>" + temp.tiffTagContent + "</JPEGQTables>");
temp.tiffTagDescription = "Old-style JPEG compression field. TechNote2 invalidates this part of the specification.";
temp.tiffTagKind = "Extended";
break;
case 520:
xmlsummary.println("<JPEGDCTables>" + temp.tiffTagContent + "</JPEGDCTables>");
temp.tiffTagDescription = "Old-style JPEG compression field. TechNote2 invalidates this part of the specification.";
temp.tiffTagKind = "Extended";
break;
case 521:
xmlsummary.println("<JPEGACTables>" + temp.tiffTagContent + "</JPEGACTables>");
temp.tiffTagDescription = "Old-style JPEG compression field. TechNote2 invalidates this part of the specification.";
temp.tiffTagKind = "Extended";
break;
case 270:
xmlsummary.println("<ImageDescription>" + temp.tiffTagContent + "</ImageDescription>");
temp.tiffTagDescription = "A string that describes the subject of the image.";
temp.tiffTagKind = "Baseline";
break;
default:
// xmlsummary.println("<UnknownTiffTag>" + temp.tiffTagName
// + "</UnknownTiffTag>");
String unknownTiffTag = temp.tiffTagName;
unknownTiffTag = unknownTiffTag.replace(" ", ""); // get rid
// of
// spaces
// because
// XML
// cannot
// deal
// with
// them
xmlsummary.println("<" + unknownTiffTag + ">" + temp.tiffTagContent + "</" + unknownTiffTag + ">");
}
listTiffTags.add(temp); // used to be placed before all the
// cases,
// but the description should be
// included
}
xmlsummary.println("</TiffTags>");
}
catch (Exception e) {
xmlsummary.println("<ErrorMessage>" + e + "</ErrorMessage>");
problematicTiffs++;
}
// how to get a certain tiff tag:
// TiffField tileWidthField =
// tiffDirectory.findField(TiffTagConstants.TIFF_TAG_BITS_PER_SAMPLE);
}
}