/**
* Copyright 2008 The University of North Carolina at Chapel Hill
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.unc.lib.dl.util;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Gregory Jansen
*
*/
public class MetsDirDiff {
private static int fileCount = 0;
private static int refCount = 0;
private static File dir = null;
private static int canonicalPathTrim = 0;
private static void usage() {
System.out
.println("Usage: java -cp cdla-common.jar edu.unc.lib.dl.util.MetsDirDiff [options...] <mets file.xml> <directory path>");
System.out.println(" <mets file.xml>\tfull path to a mets.xml file");
System.out
.println(" <directory path>\tfull path to the directory (within which METS file pointers are relative)");
System.out
.println("Options:\tMETS file and directory arguments can be in any order.\n --nopath\t match filenames and total numbers only (for checking http references)");
System.exit(0);
}
/**
* @param args
*/
/**
* @param args
*/
public static void main(String[] args) {
if (args.length < 2) {
usage();
}
File metsFile = null;
boolean nopath = false;
for (int i = 0; i < args.length; i++) {
if (args[i].endsWith(".xml") && new File(args[i]).exists()) {
metsFile = new File(args[i]);
} else if ("--nopath".equals(args[i].trim())) {
nopath = true;
} else if(new File(args[i]).exists() && new File(args[i]).isDirectory()){
dir = new File(args[i]);
} else {
System.out.println("Unrecognized option: "+args[i]);
usage();
}
}
if (metsFile == null || dir == null) {
throw new RuntimeException("You must supply both a mets file (ending in .xml) and a directory path");
}
if (!metsFile.exists()) {
throw new RuntimeException("The METS file " + metsFile.getAbsolutePath() + " does not exist.");
}
if (!dir.exists()) {
throw new RuntimeException("The directory path " + dir.getAbsolutePath() + " does not exist.");
}
try {
canonicalPathTrim = dir.getCanonicalPath().length() + 1;
} catch (IOException e) {
throw new RuntimeException(e);
}
Set<String> files = new HashSet<String>();
try {
addFolder(dir, files, nopath);
} catch (IOException e) {
throw new RuntimeException(e);
}
Set<String> references = new HashSet<String>();
Pattern hrefGrabber = Pattern.compile("xlink:href=\"file://(.*)\"|xlink:href=\"(.*)\"");
if (nopath) {
System.out.println("Running with --nopath option. Paths are not checked.");
}
System.out.println("Unmatched file pointers in " + metsFile.getPath() + ":");
try(LineNumberReader fr = new LineNumberReader(new FileReader(metsFile))) {
for (String line = fr.readLine(); line != null; line = fr.readLine()) {
Matcher grabbed = hrefGrabber.matcher(line);
String path = null;
if (grabbed.find()) {
if (grabbed.group(1) != null) {
path = grabbed.group(1);
} else if (grabbed.group(2) != null) {
path = grabbed.group(2);
}
refCount++;
if (nopath) {
path = path.substring(path.lastIndexOf('/') + 1);
}
references.add(path);
if (!files.contains(path)) {
System.out.println("line " + fr.getLineNumber() + ": " + path);
}
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
System.out.println("Unmatched files in " + dir.getPath() + ":");
files.removeAll(references);
for (String f : files) {
System.out.println(f);
}
if (fileCount == refCount) {
System.out.println("There are " + refCount + " METS file pointers and " + fileCount + " files.");
} else {
System.out.println("WARNING: There are " + refCount + " METS file pointers and " + fileCount + " files.");
}
}
private static void addFolder(File folder, Set<String> index, boolean nopath) throws IOException {
for (File f : folder.listFiles()) {
if (f.isDirectory()) {
addFolder(f, index, nopath);
} else if (f.isFile()) {
fileCount++;
if (nopath) {
// put in filename only
index.add(f.getName());
} else {
// path with normalized separators
index.add(f.getCanonicalPath().substring(canonicalPathTrim).replace('\\', '/'));
}
}
}
}
}