/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.mime;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Internet media type.
*/
public final class MediaType implements Comparable<MediaType>, Serializable {
/**
* Serial version UID.
*/
private static final long serialVersionUID = -3831000556189036392L;
private static final SortedMap<String, String> NO_PARAMETERS =
Collections.unmodifiableSortedMap(new TreeMap<String, String>());
private static final Pattern SPECIAL =
Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=]");
private static final Pattern SPECIAL_OR_WHITESPACE =
Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]");
// TIKA-350: handle charset as first element in content-type
// See http://www.ietf.org/rfc/rfc2045.txt for valid mime-type characters.
private static final String VALID_MIMETYPE_CHARS = "[^\\c\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]";
private static final String MIME_TYPE_PATTERN_STRING = "(" + VALID_MIMETYPE_CHARS + "+)"
+ "\\s*/\\s*" + "(" + VALID_MIMETYPE_CHARS + "+)";
private static final Pattern CONTENT_TYPE_PATTERN = Pattern.compile(
"(?is)\\s*" + MIME_TYPE_PATTERN_STRING + "\\s*($|;.*)");
private static final Pattern CONTENT_TYPE_CHARSET_FIRST_PATTERN = Pattern.compile(
"(?i)\\s*(charset\\s*=\\s*[^\\c;\\s]+)\\s*;\\s*" + MIME_TYPE_PATTERN_STRING);
public static final MediaType OCTET_STREAM = application("octet-stream");
public static final MediaType TEXT_PLAIN = text("plain");
public static final MediaType APPLICATION_XML = application("xml");
public static final MediaType APPLICATION_ZIP = application("zip");
public static MediaType application(String type) {
return new MediaType("application", type);
}
public static MediaType audio(String type) {
return new MediaType("audio", type);
}
public static MediaType image(String type) {
return new MediaType("image", type);
}
public static MediaType text(String type) {
return new MediaType("text", type);
}
public static MediaType video(String type) {
return new MediaType("video", type);
}
/**
* Parses the given string to a media type. The string is expected to be of
* the form "type/subtype(; parameter=...)*" as defined in RFC 2045, though
* we also handle "charset=xxx; type/subtype" for broken web servers.
*
* @param string
* media type string to be parsed
* @return parsed media type, or <code>null</code> if parsing fails
*/
public static MediaType parse(String string) {
if (string == null) {
return null;
}
String type;
String subtype;
String params;
Matcher m = CONTENT_TYPE_PATTERN.matcher(string);
if (m.matches()) {
type = m.group(1);
subtype = m.group(2);
params = m.group(3);
} else {
m = CONTENT_TYPE_CHARSET_FIRST_PATTERN.matcher(string);
if (m.matches()) {
params = m.group(1);
type = m.group(2);
subtype = m.group(3);
} else {
return null;
}
}
Map<String, String> parameters = new HashMap<String, String>();
for (String paramPiece : params.split(";")) {
String[] keyValue = paramPiece.split("=", 2);
String key = keyValue[0].trim();
if (key.length() > 0) {
if (keyValue.length > 1) {
parameters.put(key, keyValue[1].trim());
} else {
parameters.put(key, "");
}
}
}
return new MediaType(type, subtype, parameters);
}
private final String type;
private final String subtype;
/**
* Immutable map of media type parameters.
*/
private final SortedMap<String, String> parameters;
public MediaType(
String type, String subtype, Map<String, String> parameters) {
this.type = type.trim().toLowerCase(Locale.ENGLISH);
this.subtype = subtype.trim().toLowerCase(Locale.ENGLISH);
if (parameters.isEmpty()) {
this.parameters = NO_PARAMETERS;
} else {
SortedMap<String, String> map = new TreeMap<String, String>();
for (Map.Entry<String, String> entry : parameters.entrySet()) {
map.put(entry.getKey().trim().toLowerCase(Locale.ENGLISH),
entry.getValue());
}
this.parameters = Collections.unmodifiableSortedMap(map);
}
}
public MediaType(String type, String subtype) {
this(type, subtype, NO_PARAMETERS);
}
private static Map<String, String> union(
Map<String, String> a, Map<String, String> b) {
if (a.isEmpty()) {
return b;
} else if (b.isEmpty()) {
return a;
} else {
Map<String, String> union = new HashMap<String, String>();
union.putAll(a);
union.putAll(b);
return union;
}
}
public MediaType(MediaType type, Map<String, String> parameters) {
this(type.type, type.subtype, union(type.parameters, parameters));
}
public MediaType getBaseType() {
if (parameters.isEmpty()) {
return this;
} else {
return new MediaType(type, subtype);
}
}
public String getType() {
return type;
}
public String getSubtype() {
return subtype;
}
/**
* Checks whether this media type contains parameters.
*
* @since Apache Tika 0.8
* @return <code>true</code> if this type has one or more parameters,
* <code>false</code> otherwise
*/
public boolean hasParameters() {
return !parameters.isEmpty();
}
/**
* Returns an immutable sorted map of the parameters of this media type.
* The parameter names are guaranteed to be trimmed and in lower case.
*
* @return sorted map of parameters
*/
public Map<String, String> getParameters() {
return parameters;
}
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append(type);
builder.append('/');
builder.append(subtype);
for (Map.Entry<String, String> entry : parameters.entrySet()) {
builder.append("; ");
builder.append(entry.getKey());
builder.append("=");
String value = entry.getValue();
if (SPECIAL_OR_WHITESPACE.matcher(value).find()) {
builder.append('"');
builder.append(SPECIAL.matcher(value).replaceAll("\\\\$0"));
builder.append('"');
} else {
builder.append(value);
}
}
return builder.toString();
}
public boolean equals(Object object) {
if (object instanceof MediaType) {
MediaType that = (MediaType) object;
return type.equals(that.type)
&& subtype.equals(that.subtype)
&& parameters.equals(that.parameters);
} else {
return false;
}
}
public int hashCode() {
int hash = 17;
hash = hash * 31 + type.hashCode();
hash = hash * 31 + subtype.hashCode();
hash = hash * 31 + parameters.hashCode();
return hash;
}
public int compareTo(MediaType that) {
return toString().compareTo(that.toString());
}
}