package org.archive.resource.warc.record;
import java.io.IOException;
import java.io.InputStream;
import org.archive.format.http.HttpHeaderParser;
import org.archive.format.http.HttpHeaders;
import org.archive.format.http.HttpParseException;
import org.archive.resource.MetaData;
import org.archive.resource.ResourceConstants;
import org.archive.resource.Resource;
import org.archive.resource.ResourceContainer;
import org.archive.resource.ResourceFactory;
import org.archive.resource.ResourceParseException;
import org.archive.util.StreamCopy;
public class WARCMetaDataResourceFactory implements ResourceFactory, ResourceConstants {
HttpHeaderParser parser;
public WARCMetaDataResourceFactory() {
parser = new HttpHeaderParser();
}
public Resource getResource(InputStream is, MetaData parentMetaData,
ResourceContainer container) throws ResourceParseException,
IOException {
HttpHeaders headers = new HttpHeaders();
try {
parentMetaData.putString(PAYLOAD_CONTENT_TYPE,
PAYLOAD_TYPE_WARC_META_FIELDS);
MetaData md = parentMetaData.createChild(WARC_META_FIELDS_METADATA);
int bytes = parser.doParse(is,headers);
if(headers.isCorrupt()) {
md.putBoolean(WARC_META_FIELDS_CORRUPT, true);
}
md.putLong(PAYLOAD_SLOP_BYTES, StreamCopy.readToEOF(is));
md.putLong(PAYLOAD_LENGTH, bytes);
return new WARCMetaDataResource(md,container, headers);
} catch (HttpParseException e) {
throw new ResourceParseException(e);
}
}
}