/**
* JHOVE2 - Next-generation architecture for format-aware characterization
*
* Copyright (c) 2009 by The Regents of the University of California,
* Ithaka Harbors, Inc., and The Board of Trustees of the Leland Stanford
* Junior University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* o Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* o Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* o Neither the name of the University of California/California Digital
* Library, Ithaka Harbors/Portico, or Stanford University, nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jhove2.module.display.util;
import java.io.IOException;
import java.text.DateFormat;
import java.text.FieldPosition;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.TimeZone;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jdom.JDOMException;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* A wrapper for the data needed to build the containerMD section
* of the manifest. Data are gathered during the parse of the JHove2
* characterization result analysis and aggregated.
*/
public final class ContainerMDWrapper
{
public static final String CONTAINER_PREFIX = "containerMD";
public static final String CONTAINER_URI = "http://bibnum.bnf.fr/ns/containerMD-v1";
public static final DateFormat rawDateFormat = new MtSafeDateFormat("yyyyMMddHHmmss");
public static final DateFormat dateFormat = new MtSafeDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
private static Logger log = Logger.getLogger(ContainerMDWrapper.class.getName());
private final static Pattern hostExtractor = Pattern.compile("^([a-zA-Z]*):/{0,3}([.[^/#?:]]*)(?:.*)");
private final static long MIN_VALID_DATE = 19700101000000L;
/** ArcRecordSource handlers */
public Map<String,ContainerElement> encodings;
public Map<String,ContainerElement> formats;
public Map<String,ContainerElement> declaredMimeTypes;
public Map<String,ContainerElement> hosts;
public Map<String,ContainerElement> responses;
private long firstDateTime = -1L;
private long lastDateTime = -1L;
private long minimumSize = Long.MAX_VALUE;
private long maximumSize = 0L;
private long globalSize = 0L;
/** Permitted attributes for the containerMD elements */
protected enum AttributeName
{
NUMBER("number"),
NAME("name"),
SIZE("size"),
TYPE("type"),
METHOD("method"),
ORDER("order"),
PROTOCOL_NAME("protocolName"),
PROTOCOL_VERSION("protocolVersion"),
GLOBALSIZE("globalSize") ;
public final String key;
private AttributeName(String key) {
this.key = key;
}
@Override
public String toString() {
return this.key;
}
}
/**
* Creates a new ContainerMDWrapper instance.
*/
public ContainerMDWrapper()
{
/** Initialization */
this.formats = new HashMap<String,ContainerElement>();
this.declaredMimeTypes = new HashMap<String,ContainerElement>();
this.encodings = new HashMap<String,ContainerElement>();
this.hosts = new HashMap<String,ContainerElement>();
this.responses = new HashMap<String,ContainerElement>();
log.log(Level.FINE, "{}|ContainerMDWrapper 0x{}: new",
new Object[] {
Thread.currentThread().getName(),
Integer.toHexString(System.identityHashCode(this))
});
}
public void addEntry(String sourceName, long size, String dateTime,
String format, String mimeType,
String protocolVersion, String codeResponse) {
log.log(Level.FINEST, "addEntry: {} ({}, {}, {})",
new Object[] { sourceName, Long.valueOf(size),
dateTime, mimeType });
if (size >= 0L) {
this.setMaximumSize(size);
this.setMinimumSize(size);
this.setGlobalSize(size);
}
if (! isBlank(dateTime)) {
try {
long l = Long.parseLong(dateTime);
this.setFirstDateTime(l);
this.setLastDateTime(l);
}
catch (Exception e) {
log.log(Level.WARNING, "Invalid ARC entry date ({}) for {}",
new Object[] { dateTime, sourceName });
/* Ignore... */
}
}
Matcher m = hostExtractor.matcher(sourceName);
if (m.matches()) {
String protocol = m.group(1);
String hostName = m.group(2);
this.handleHost(hostName, size);
this.handleResponse(protocolVersion, protocol, codeResponse,size);
}
if ((mimeType != null) && (mimeType.length() != 0)) {
this.handleDeclaredMimeType(mimeType,size);
}
if ((format != null) && (format.length() != 0)) {
this.handleFormat(format, size);
}
}
/**
* Checks whether hosts have been handled.
* @return true | false
*/
public boolean hasHosts()
{
return !hosts.isEmpty();
}
/**
* Checks whether responses have been handled.
* @return true | false
*/
public boolean hasResponses()
{
return !responses.isEmpty();
}
/**
* Checks whether encodings have been handled.
* @return true | false
*/
public boolean hasEncodings()
{
return !encodings.isEmpty();
}
/**
* Checks whether formats have been handled.
* @return true | false
*/
public boolean hasFormats() {
return !formats.isEmpty();
}
/**
* Checks whether declared mimeTypes have been handled.
* @return true | false
*/
public boolean hasDeclaredMimeTypes() {
return !declaredMimeTypes.isEmpty();
}
/**
* Sets maximum size
* @param size
*/
public void setMaximumSize(long size) {
if (this.maximumSize < size) {
this.maximumSize = size;
}
}
/**
* Gets maximumSize
* @return <code>String</code>
*/
public String getMaximumSize() {
return Long.toString(this.maximumSize);
}
/**
* Sets minimum size
* @param size
*/
public void setMinimumSize(long size) {
if ((size >= 0L) && (this.minimumSize > size)) {
this.minimumSize = size ;
}
}
/**
* Sets the global size
* @param size
*/
public void setGlobalSize(long size) {
if (size >= 0L) {
this.globalSize += size;
}
}
/**
* Gets the global size
* @return <code>long</code>
*/
public long getGlobalSize() {
return this.globalSize;
}
/**
* Gets minimumSize
* @return <code>String</code>
*/
public String getMinimumSize() {
log.log(Level.FINE, "{}|{}",
new Object[] { Thread.currentThread().getName(), this });
return Long.toString(
(this.minimumSize == Long.MAX_VALUE)? 0L: this.minimumSize);
}
/**
* Sets firstLastTime
* @param dateTime
*/
public void setFirstDateTime(long dateTime) {
if ((firstDateTime == -1L) || (firstDateTime > dateTime)) {
firstDateTime = dateTime;
}
}
/**
* Gets firstDateTime
* @return <code>String</code>
* @throws ParseException
*/
public String getFirstDateTime() throws ParseException {
return formatDateTime(longToDate(this.firstDateTime));
}
/**
* Sets lastDateTime
* @param dateTime
*/
public void setLastDateTime(long dateTime) {
if (lastDateTime < dateTime) {
lastDateTime = dateTime;
}
}
/**
* Gets lastDateTime
* @return <code>String</code>
* @throws ParseException
*/
public String getLastDateTime() throws ParseException {
return formatDateTime(longToDate(this.lastDateTime));
}
/**
* Returns containerMD encoding elements formatted into XML.
* @return <code>String</code>
*/
public String getEncodings() throws JDOMException, IOException
{
return toXml(this.encodings.values());
}
/**
* Returns containerMD declared mimeTypes elements formatted into XML.
* @return <code>String</code>
*/
public String getDeclaredMimeTypes() throws JDOMException, IOException
{
return toXml(this.declaredMimeTypes.values());
}
/**
* Returns containerMD format elements formatted into XML.
* @return <code>String</code>
*/
public String getFormats() throws JDOMException, IOException {
return toXml(this.formats.values());
}
/**
* Returns containerMD host elements formatted into XML.
* @return <code>String</code>
*/
public String getHosts() throws JDOMException, IOException
{
return toXml(this.hosts.values());
}
/**
* Returns containerMD response elements formatted into XML.
* @return <code>String</code>
*/
public String getResponses() throws JDOMException, IOException
{
return toXml(this.responses.values());
}
/**
* Formats a given long "yyyyMMddHHmmss" into a date
* @param date
* @return Long
*/
protected Date longToDate(long date) throws ParseException {
Date d = null;
if (date >= MIN_VALID_DATE) {
d = rawDateFormat.parse(String.valueOf(date));
}
return d;
}
/**
* Formats a given date into a long "yyyyMMddHHmmss"
* @param date
* @return Long
*/
protected Long dateToLong(Date date) throws ParseException
{
String stringDate = rawDateFormat.format( date );
return Long.valueOf(stringDate);
}
/**
* Formats a given date into "yyyy-MM-dd'T'HH:mm:ss'Z'".
*
* @param DateTime
* @return <code>String</code>
* @throws ParseException
*/
protected String formatDateTime(Date date) throws ParseException {
if (date == null) {
throw new IllegalArgumentException("Invalid date: " + date);
}
return dateFormat.format(date);
}
public String toXml(Collection<ContainerElement> elts)
throws JDOMException, IOException {
StringBuilder stringElements = new StringBuilder();
for (ContainerElement e : elts) {
stringElements.append( e.toString(CONTAINER_PREFIX) );
}
return stringElements.toString();
}
/**
* Handles distinct encodings
* @param encoding
*/
public void handleEncoding(String type, String method)
{
if( !encodings.containsKey( method ))
{
ContainerElement container = new ContainerElement("encoding");
container.getAttributes().put(AttributeName.TYPE, type);
container.getAttributes().put(AttributeName.METHOD, method);
container.getAttributes().put(AttributeName.ORDER,
Integer.valueOf(encodings.size()+1));
encodings.put(method,container);
}
}
/**
* Handles distinct formats
* @param format
* @param size
*/
public void handleFormat(String format, long size)
{
ContainerElement container = formats.get( format );
if( container != null )
{
Map<AttributeName,Object> attrs = container.getAttributes();
((AtomicInteger)attrs.get(AttributeName.NUMBER)).incrementAndGet();
((AtomicLong)attrs.get(AttributeName.GLOBALSIZE)).addAndGet(size);
}else {
container = new ContainerElement("format");
container.getAttributes().put(AttributeName.NAME, format);
container.getAttributes().put(AttributeName.NUMBER, new AtomicInteger(1));
container.getAttributes().put(AttributeName.GLOBALSIZE, new AtomicLong(size));
formats.put(format, container );
}
}
/**
* Handles distinct declared mimeTypes
* @param mimeType
*/
public void handleDeclaredMimeType(String mimeType,long size)
{
ContainerElement container = declaredMimeTypes.get(mimeType);
if( container != null )
{
Map<AttributeName,Object> attrs = container.getAttributes();
((AtomicInteger)attrs.get(AttributeName.NUMBER)).incrementAndGet();
((AtomicLong)attrs.get(AttributeName.GLOBALSIZE)).addAndGet(size);
}else{
container = new ContainerElement("declaredMimeType", mimeType);
container.getAttributes().put(AttributeName.NUMBER, new AtomicInteger(1));
container.getAttributes().put(AttributeName.GLOBALSIZE, new AtomicLong(size));
declaredMimeTypes.put(mimeType, container );
}
}
/**
* Handles distinct hosts
* @param host
* @param size
*/
public void handleHost(String host, long size)
{
ContainerElement container = hosts.get(host);
if( container != null )
{
Map<AttributeName,Object> attrs = container.getAttributes();
((AtomicInteger)attrs.get(AttributeName.NUMBER)).incrementAndGet();
((AtomicLong)attrs.get(AttributeName.GLOBALSIZE)).addAndGet(size);
}else{
container = new ContainerElement("host", host);
container.getAttributes().put(AttributeName.NUMBER, new AtomicInteger(1));
container.getAttributes().put(AttributeName.GLOBALSIZE, new AtomicLong(size));
hosts.put(host, container );
}
}
/**
* Handles distinct response
* @param protocolVersion
* @param protocolName
* @param codeResponse
*/
public void handleResponse(String protocolVersion, String protocolName, String codeResponse, long size)
{
String key = protocolName + '|' + protocolVersion + '|' + codeResponse;
ContainerElement container = responses.get(key);
if( container != null )
{
Map<AttributeName,Object> attrs = container.getAttributes();
((AtomicInteger)attrs.get(AttributeName.NUMBER)).incrementAndGet();
((AtomicLong)attrs.get(AttributeName.GLOBALSIZE)).addAndGet(size);
}else{
container = new ContainerElement("response", codeResponse);
container.getAttributes().put(AttributeName.NUMBER, new AtomicInteger(1));
container.getAttributes().put(AttributeName.PROTOCOL_NAME, protocolName );
container.getAttributes().put(AttributeName.PROTOCOL_VERSION, protocolVersion );
container.getAttributes().put(AttributeName.GLOBALSIZE, new AtomicLong(size));
responses.put(key, container );
}
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(512);
buf.append("ContainerMDWrapper 0x")
.append(Integer.toHexString(System.identityHashCode(this)))
.append(" { ");
buf.append("minimumSize=").append(this.minimumSize).append(", ");
buf.append("maximumSize=").append(this.maximumSize).append(", ");
buf.append("globalSize=").append(this.globalSize).append(", ");
buf.append("firstDateTime=").append(this.firstDateTime).append(", ");
buf.append("lastDateTime=").append(this.lastDateTime).append(", ");
buf.append("encodings=").append(this.encodings).append(", ");
buf.append("MIME types=").append(this.declaredMimeTypes).append(", ");
buf.append("formats=").append(this.formats).append(", ");
buf.append("hosts=").append(this.hosts).append(", ");
buf.append("responses=").append(this.responses);
return buf.append(" }").toString();
}
/**
* Check if a string is <code>null</code>, empty or contains only
* whitespace characters.
*
* @param s the string to check, may be <code>null</code>.
*
* @return <code>true</code> if the string is <code>null<code>,
* empty ("") or contains only whitespaces characters.
*/
public static boolean isBlank(String s) {
return ((s == null) || (s.trim().length() == 0));
}
/*
* Thread safe date formatter.
*/
private final static class MtSafeDateFormat extends SimpleDateFormat {
/**
* UID.
*/
private static final long serialVersionUID = -8797209035403605920L;
public MtSafeDateFormat(String pattern) {
super(pattern);
this.setLenient(false);
this.setTimeZone(TimeZone.getTimeZone("UTC"));
}
@Override
public synchronized Date parse(String source) throws ParseException {
return super.parse(source);
}
@Override
public synchronized StringBuffer format(Date date,
StringBuffer toAppendTo,
FieldPosition pos) {
return super.format(date, toAppendTo, pos);
}
}
}