/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.resourcestore.resourcefile; import java.io.IOException; import java.util.Enumeration; import java.util.HashMap; import java.util.Hashtable; import java.util.Iterator; import java.util.Map; import java.util.Set; import org.apache.commons.httpclient.Header; import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; import org.archive.io.arc.ARCRecord; import org.archive.wayback.core.Resource; import org.archive.wayback.replay.HttpHeaderOperation; public class ArcResource extends Resource { /** * String prefix for ARC file related metadata namespace of keys within * metaData Properties bag. */ private static String ARC_META_PREFIX = "arcmeta."; /** * String prefix for HTTP Header related metadata namespace of keys within * metaData Properties bag. */ private static String HTTP_HEADER_PREFIX = "httpheader."; /** * object for ARCRecord */ ARCRecord arcRecord = null; /** * object for ARCReader -- need to hold on to this in order to call close() * to release filehandle after completing access to this record. optional */ ArchiveReader arcReader = null; /** * flag to indicate if the ARCRecord skipHTTPHeader() has been called */ boolean parsedHeader = false; /** * Expandable property bag for holding metadata associated with this * resource */ Hashtable<String,String> metaData = new Hashtable<String,String>(); /** * Constructor * * @param rec * @param reader */ public ArcResource(final ARCRecord rec,final ArchiveReader reader) { super(); arcRecord = rec; arcReader = reader; setInputStream(rec); } /** parse the headers on the underlying ARC record, and extract all * @throws IOException */ public void parseHeaders () throws IOException { if(!parsedHeader) { arcRecord.skipHttpHeader(); // copy all HTTP headers to metaData, prefixing with // HTTP_HEADER_PREFIX Header[] headers = arcRecord.getHttpHeaders(); if (headers != null) { for (int i = 0; i < headers.length; i++) { String value = headers[i].getValue(); String name = headers[i].getName(); metaData.put(HTTP_HEADER_PREFIX + name,value); if(name.toUpperCase().contains( HttpHeaderOperation.HTTP_TRANSFER_ENC_HEADER)) { if(value.toUpperCase().contains( HttpHeaderOperation.HTTP_CHUNKED_ENCODING_HEADER)) { setChunkedEncoding(); } } } } // copy all ARC record header fields to metaData, prefixing with // ARC_META_PREFIX Map<String,Object> headerMetaMap = arcRecord.getMetaData().getHeaderFields(); Set<String> keys = headerMetaMap.keySet(); Iterator<String> itr = keys.iterator(); while(itr.hasNext()) { String metaKey = itr.next(); Object value = headerMetaMap.get(metaKey); String metaValue = ""; if(value != null) { metaValue = value.toString(); } metaData.put(ARC_META_PREFIX + metaKey,metaValue); } parsedHeader = true; } } /** * @param prefix * @return a Properties of all elements in metaData starting with 'prefix'. * keys in the returned Properties have 'prefix' removed. */ public Map<String,String> filterMeta(String prefix) { HashMap<String,String> matching = new HashMap<String,String>(); for (Enumeration<String> e = metaData.keys(); e.hasMoreElements();) { String key = e.nextElement(); if (key.startsWith(prefix)) { String finalKey = key.substring(prefix.length()); String value = metaData.get(key); matching.put(finalKey, value); } } return matching; } /** * @return a Properties containing all HTTP header fields for this record */ public Map<String,String> getHttpHeaders() { return filterMeta(HTTP_HEADER_PREFIX); } /** * @return a Properties containing all ARC Meta fields for this record */ public Map<String,String> getARCMetadata() { return filterMeta(ARC_META_PREFIX); } /** * (non-Javadoc) * @see org.archive.io.arc.ARCRecord#getStatusCode() * @return int HTTP status code returned with this document. */ public int getStatusCode() { return arcRecord.getStatusCode(); } /** * @return the ARCRecord underlying this Resource. */ public ArchiveRecord getArcRecord() { return arcRecord; } /* (non-Javadoc) * @see org.archive.io.arc.ARCRecord#close() */ public void close() throws IOException { arcRecord.close(); if(arcReader != null) { arcReader.close(); } } /** * @return byte length claimed in ARC record metadata line. */ public long getRecordLength() { return arcRecord.getMetaData().getLength(); } }