/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.wayback.util;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.archive.io.WriterPoolSettings;
import org.archive.io.arc.ARCConstants;
import org.archive.io.warc.WARCWriter;
import org.archive.io.warc.WARCWriterPoolSettings;
import org.archive.uid.RecordIDGenerator;
import org.archive.uid.UUIDGenerator;
import org.archive.util.anvl.ANVLRecord;
public class WARCHeader {
private void writeHeaderRecord(File target, File fieldsSrc, String id)
throws IOException {
WARCWriter writer = null;
BufferedOutputStream bos =
new BufferedOutputStream(new FileOutputStream(target));
FileInputStream is = new FileInputStream(fieldsSrc);
ANVLRecord ar = ANVLRecord.load(is);
List<String> metadata = new ArrayList<String>(1);
metadata.add(ar.toString());
writer = new WARCWriter(new AtomicInteger(),bos,target,getSettings(true, null, null, metadata));
// Write a warcinfo record with description about how this WARC
// was made.
writer.writeWarcinfoRecord(target.getName(), "Made from "
+ id + " by "
+ this.getClass().getName());
}
private WARCWriterPoolSettings getSettings(final boolean isCompressed,
final String prefix, final List<File> arcDirs, final List metadata) {
return new WARCWriterPoolSettings() {
public List<File> calcOutputDirs() {
return arcDirs;
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public List getMetadata() {
return metadata;
}
public String getPrefix() {
return prefix;
}
public boolean getCompress() {
return isCompressed;
}
public long getMaxFileSizeBytes() {
return ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE;
}
public String getTemplate() {
return "${prefix}-${timestamp17}-${serialno}";
}
public boolean getFrequentFlushes() {
return false;
}
public int getWriteBufferSize() {
return 4096;
}
public RecordIDGenerator getRecordIDGenerator() {
return new UUIDGenerator();
}
};
}
public static void main(String[] args) {
if (args.length != 3) {
System.err.println("USAGE: tgtWarc fieldsSrc id");
System.err.println("\ttgtWarc is the path to the target WARC.gz");
System.err.println("\tfieldsSrc is the path to the text of the record");
System.err.println("\t\tmake sure each line is terminated by \\r\\n");
System.err.println("\t\tand that the file ends with a blank, \\r\\n terminiated line");
System.err.println("\tid is the XXX in:");
System.err.println("\t\tContent-Description: Made from XXX by org.archive.wayback.util.WARCHeader");
System.err.println("\t\tof the header record... header...");
System.exit(1);
}
File target = new File(args[0]);
File fieldSrc = new File(args[1]);
String id = args[2];
WARCHeader header = new WARCHeader();
try {
header.writeHeaderRecord(target, fieldSrc, id);
} catch (IOException e) {
e.printStackTrace();
System.exit(2);
}
}
}