/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.wayback.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
import org.archive.io.WriterPoolSettings;
import org.archive.io.arc.ARCConstants;
import org.archive.io.arc.ARCWriter;
import org.archive.util.ArchiveUtils;
/**
*
*
* @author brad
* @version $Date$, $Revision$
*/
public class ARCCreator {
private static Logger LOGGER = Logger.getLogger(ARCCreator.class.getName());
private static String DEFAULT_PREFIX = "test-arc";
private HashMap<String,RecordComponents> components =
new HashMap<String,RecordComponents>();
private RecordComponents getRecordComponents(final String key) {
RecordComponents rc = components.get(key);
if(rc == null) {
rc = new RecordComponents(key);
components.put(key,rc);
}
return rc;
}
private void addFile(File file) {
String key = null;
String name = file.getName();
RecordComponents rc;
if(!file.isFile()) {
throw new RuntimeException("file " + file.getAbsolutePath() +
"is not a regular file");
}
if(name.endsWith(".meta")) {
key = name.substring(0,name.length() - 5);
rc = getRecordComponents(key);
rc.noteMeta();
} else if(name.endsWith(".body")) {
key = name.substring(0,name.length() - 5);
rc = getRecordComponents(key);
rc.noteBody();
} else if(name.endsWith(".sh")) {
key = name.substring(0,name.length() - 3);
rc = getRecordComponents(key);
rc.noteScript();
} else {
throw new RuntimeException("No key for file " +
file.getAbsolutePath());
}
}
/**
* Reads all component files (.meta, .body, .sh) in srcDir, and writes
* one or more ARC files in tgtDir with names beginning with prefix.
*
* @param srcDir
* @param tgtDir
* @param prefix
* @throws IOException
*/
public void directoryToArc(File srcDir, File tgtDir, String prefix)
throws IOException {
File target[] = {tgtDir};
ARCWriter writer = new ARCWriter(new AtomicInteger(),
getSettings(true,prefix,Arrays.asList(target)));
File sources[] = srcDir.listFiles();
LOGGER.info("Found " + sources.length + " files in " + srcDir);
for(int i = 0; i<sources.length; i++) {
addFile(sources[i]);
}
LOGGER.info("Associated " + sources.length + " files in " + srcDir);
// sort keys and write them all:
Object arr[] = components.keySet().toArray();
Arrays.sort(arr);
for(int i = 0; i < arr.length; i++) {
String key = (String) arr[i];
RecordComponents rc = components.get(key);
rc.writeRecord(writer,srcDir);
LOGGER.info("Wrote record keyed " + rc.key);
}
writer.close();
LOGGER.info("Closed arc file named " +
writer.getFile().getAbsolutePath());
}
private WriterPoolSettings getSettings(final boolean isCompressed,
final String prefix, final List<File> arcDirs) {
return new WriterPoolSettings() {
public List<File> calcOutputDirs() {
return arcDirs;
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public List getMetadata() {
return null;
}
public String getPrefix() {
return prefix;
}
public boolean getCompress() {
return isCompressed;
}
public long getMaxFileSizeBytes() {
return ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE;
}
public String getTemplate() {
return "${prefix}-${timestamp17}-${serialno}";
}
public boolean getFrequentFlushes() {
return false;
}
public int getWriteBufferSize() {
return 4096;
}
};
}
/**
* @param args
*/
public static void main(String[] args) {
if((args.length < 2) || (args.length > 3)) {
System.err.println("USAGE: srcDir tgtDir [arc_prefix]");
System.exit(1);
}
File srcDir = new File(args[0]);
File tgtDir = new File(args[1]);
String prefix = null;
if(args.length == 3) {
prefix = args[2];
} else {
prefix = DEFAULT_PREFIX;
}
ARCCreator creator = new ARCCreator();
try {
creator.directoryToArc(srcDir,tgtDir,prefix);
} catch (IOException e) {
e.printStackTrace();
System.exit(2);
}
}
private class RecordComponents {
private String key;
private boolean meta;
private boolean body;
private boolean script;
/**
* constructor
*
* @param key
*/
public RecordComponents(final String key) {
super();
this.key = key;
}
private boolean isComplete() {
return meta && body && script;
}
/**
* notes that the .meta file has been seen
*/
public void noteMeta() { meta = true; }
/**
* notes that the .body file has been seen
*/
public void noteBody() { body = true; }
/**
* notes that the .sh file has been seen
*/
public void noteScript() { script = true; }
/**
* checks that all required files have been seen for this record, then
* reads and parses the metafile, then write()s the record on the
* writer.
*
* @param writer
* @param componentDir
* @throws IOException
*/
public void writeRecord(ARCWriter writer, File componentDir)
throws IOException {
if(!isComplete()) {
throw new RuntimeException("Missing components for key " + key +
" in directory " + componentDir.getAbsolutePath());
}
File metaFile = new File(componentDir,key + ".meta");
RandomAccessFile raFile = new RandomAccessFile(metaFile, "r");
String metaLine = raFile.readLine();
if (metaLine == null) {
throw new IOException("No meta info in " +
metaFile.getAbsolutePath());
}
String metaParts[] = metaLine.split(" ");
if(metaParts.length != 5) {
throw new IOException("Should be 5 elements in " +
metaFile.getAbsolutePath());
}
String uri = metaParts[0];
String ip = metaParts[1];
long fetchTS = 0;
try {
fetchTS = ArchiveUtils.parse14DigitDate(metaParts[2]).getTime();
} catch (ParseException e) {
throw new IOException("unparseable metaline timestamp in " +
metaFile.getAbsolutePath());
}
String type = metaParts[3];
int length = Integer.valueOf(metaParts[4]).intValue();
File bodyFile = new File(componentDir,key + ".body");
if(bodyFile.length() != length) {
throw new IOException("byte mismatch in meta length and body " +
"byte size for " + bodyFile.getAbsolutePath());
}
FileInputStream fis = new FileInputStream(bodyFile);
writer.write(uri,type,ip,fetchTS,length,fis);
}
}
}