/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.metadata;
import java.text.DateFormat;
import java.text.DateFormatSymbols;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.TimeZone;
/**
* A multi-valued metadata container.
*/
public class Metadata implements CreativeCommons, DublinCore, Geographic, HttpHeaders,
Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys {
/**
* A map of all metadata attributes.
*/
private Map<String, String[]> metadata = null;
/**
* The ISO-8601 format string we use for Dates.
* All dates are represented as UTC
*/
private static final DateFormat iso8601Format =
createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", "UTF");
/**
* Some parsers will have the date as a ISO-8601 string
* already, and will set that into the Metadata object.
* So we can return Date objects for these, this is the
* list (in preference order) of the various ISO-8601
* variants that we try when processing a date based
* property.
*/
private static final DateFormat[] iso8601InputFormats = new DateFormat[] {
// yyyy-mm-ddThh...
iso8601Format, // UTC/Zulu
createDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", null), // With timezone
createDateFormat("yyyy-MM-dd'T'HH:mm:ss", null), // Without timezone
// yyyy-mm-dd hh...
createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", "UTF"), // UTC/Zulu
createDateFormat("yyyy-MM-dd' 'HH:mm:ssZ", null), // With timezone
createDateFormat("yyyy-MM-dd' 'HH:mm:ss", null), // Without timezone
};
private static DateFormat createDateFormat(String format, String timezone) {
SimpleDateFormat sdf =
new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
if (timezone != null) {
sdf.setTimeZone(TimeZone.getTimeZone(timezone));
}
return sdf;
}
/**
* Parses the given date string. This method is synchronized to prevent
* concurrent access to the thread-unsafe date formats.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
* @param date date string
* @return parsed date, or <code>null</code> if the date can't be parsed
*/
private static synchronized Date parseDate(String date) {
// Java doesn't like timezones in the form ss+hh:mm
// It only likes the hhmm form, without the colon
int n = date.length();
if (date.charAt(n - 3) == ':'
&& (date.charAt(n - 6) == '+' || date.charAt(n - 6) == '-')) {
date = date.substring(0, n - 3) + date.substring(n - 2);
}
// Try several different ISO-8601 variants
for (DateFormat format : iso8601InputFormats) {
try {
return format.parse(date);
} catch (ParseException ignore) {
}
}
return null;
}
/**
* Returns a ISO 8601 representation of the given date. This method is
* synchronized to prevent concurrent access to the thread-unsafe date
* formats.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
* @param date given date
* @return ISO 8601 date string
*/
private static synchronized String formatDate(Date date) {
return iso8601Format.format(date);
}
/**
* Constructs a new, empty metadata.
*/
public Metadata() {
metadata = new HashMap<String, String[]>();
}
/**
* Returns true if named value is multivalued.
*
* @param name
* name of metadata
* @return true is named value is multivalued, false if single value or null
*/
public boolean isMultiValued(final String name) {
return metadata.get(name) != null && metadata.get(name).length > 1;
}
/**
* Returns an array of the names contained in the metadata.
*
* @return Metadata names
*/
public String[] names() {
return metadata.keySet().toArray(new String[metadata.keySet().size()]);
}
/**
* Get the value associated to a metadata name. If many values are assiociated
* to the specified name, then the first one is returned.
*
* @param name
* of the metadata.
* @return the value associated to the specified metadata name.
*/
public String get(final String name) {
String[] values = metadata.get(name);
if (values == null) {
return null;
} else {
return values[0];
}
}
/**
* Returns the value (if any) of the identified metadata property.
*
* @since Apache Tika 0.7
* @param property property definition
* @return property value, or <code>null</code> if the property is not set
*/
public String get(Property property) {
return get(property.getName());
}
/**
* Returns the value of the identified Integer based metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @return property value as a Integer, or <code>null</code> if the property is not set, or not a valid Integer
*/
public Integer getInt(Property property) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE)
return null;
if(property.getValueType() != Property.ValueType.INTEGER)
return null;
String v = get(property);
if(v == null) {
return null;
}
try {
return new Integer(v);
} catch(NumberFormatException e) {
return null;
}
}
/**
* Returns the value of the identified Date based metadata property.
*
* @since Apache Tika 0.8
* @param property simple date property definition
* @return property value as a Date, or <code>null</code> if the property is not set, or not a valid Date
*/
public Date getDate(Property property) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE)
return null;
if(property.getValueType() != Property.ValueType.DATE)
return null;
String v = get(property);
if (v != null) {
return parseDate(v);
} else {
return null;
}
}
/**
* Get the values associated to a metadata name.
*
* @param name
* of the metadata.
* @return the values associated to a metadata name.
*/
public String[] getValues(final String name) {
return _getValues(name);
}
private String[] _getValues(final String name) {
String[] values = metadata.get(name);
if (values == null) {
values = new String[0];
}
return values;
}
/**
* Add a metadata name/value mapping. Add the specified value to the list of
* values associated to the specified metadata name.
*
* @param name
* the metadata name.
* @param value
* the metadata value.
*/
public void add(final String name, final String value) {
String[] values = metadata.get(name);
if (values == null) {
set(name, value);
} else {
String[] newValues = new String[values.length + 1];
System.arraycopy(values, 0, newValues, 0, values.length);
newValues[newValues.length - 1] = value;
metadata.put(name, newValues);
}
}
/**
* Copy All key-value pairs from properties.
*
* @param properties
* properties to copy from
*/
@SuppressWarnings("unchecked")
public void setAll(Properties properties) {
Enumeration<String> names =
(Enumeration<String>) properties.propertyNames();
while (names.hasMoreElements()) {
String name = names.nextElement();
metadata.put(name, new String[] { properties.getProperty(name) });
}
}
/**
* Set metadata name/value. Associate the specified value to the specified
* metadata name. If some previous values were associated to this name, they
* are removed.
*
* @param name
* the metadata name.
* @param value
* the metadata value.
*/
public void set(String name, String value) {
metadata.put(name, new String[] { value });
}
/**
* Sets the value of the identified metadata property.
*
* @since Apache Tika 0.7
* @param property property definition
* @param value property value
*/
public void set(Property property, String value) {
set(property.getName(), value);
}
/**
* Sets the integer value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @param value property value
*/
public void set(Property property, int value) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE)
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
if(property.getValueType() != Property.ValueType.INTEGER)
throw new PropertyTypeException(Property.ValueType.INTEGER, property.getValueType());
set(property.getName(), Integer.toString(value));
}
/**
* Sets the real or rational value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple real or simple rational property definition
* @param value property value
*/
public void set(Property property, double value) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE)
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
if(property.getValueType() != Property.ValueType.REAL &&
property.getValueType() != Property.ValueType.RATIONAL)
throw new PropertyTypeException(Property.ValueType.REAL, property.getValueType());
set(property.getName(), Double.toString(value));
}
/**
* Sets the date value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @param value property value
*/
public void set(Property property, Date date) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE)
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
if(property.getValueType() != Property.ValueType.DATE)
throw new PropertyTypeException(Property.ValueType.DATE, property.getValueType());
set(property.getName(), formatDate(date));
}
/**
* Remove a metadata and all its associated values.
*
* @param name
* metadata name to remove
*/
public void remove(String name) {
metadata.remove(name);
}
/**
* Returns the number of metadata names in this metadata.
*
* @return number of metadata names
*/
public int size() {
return metadata.size();
}
public boolean equals(Object o) {
if (o == null) {
return false;
}
Metadata other = null;
try {
other = (Metadata) o;
} catch (ClassCastException cce) {
return false;
}
if (other.size() != size()) {
return false;
}
String[] names = names();
for (int i = 0; i < names.length; i++) {
String[] otherValues = other._getValues(names[i]);
String[] thisValues = _getValues(names[i]);
if (otherValues.length != thisValues.length) {
return false;
}
for (int j = 0; j < otherValues.length; j++) {
if (!otherValues[j].equals(thisValues[j])) {
return false;
}
}
}
return true;
}
public String toString() {
StringBuffer buf = new StringBuffer();
String[] names = names();
for (int i = 0; i < names.length; i++) {
String[] values = _getValues(names[i]);
for (int j = 0; j < values.length; j++) {
buf.append(names[i]).append("=").append(values[j]).append(" ");
}
}
return buf.toString();
}
}