package com.psddev.cms.tool.file;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Locale;
import java.util.Set;
import com.google.common.base.Preconditions;
import com.psddev.dari.util.ObjectUtils;
import com.psddev.dari.util.Settings;
import com.psddev.dari.util.SparseSet;
import com.psddev.dari.util.StorageItem;
import com.psddev.dari.util.StorageItemBeforeSave;
import com.psddev.dari.util.StorageItemUploadPart;
/**
* Validates the content type of a file. Throws an error
* if the content type is not accepted per the Application's
* {@link Settings}, and also throws an error if the file is
* an HTML file disguised as another content type.
*/
public class ContentTypeValidator implements StorageItemBeforeSave {
@Override
public void beforeSave(StorageItem storageItem, StorageItemUploadPart part) throws IOException {
if (part == null) {
return;
}
String fileContentType = part.getContentType();
if (fileContentType == null) {
return;
}
String groupsPattern = Settings.get(String.class, "cms/tool/fileContentTypeGroups");
Set<String> contentTypeGroups = new SparseSet(ObjectUtils.isBlank(groupsPattern) ? "+/" : groupsPattern);
Preconditions.checkState(contentTypeGroups.contains(fileContentType),
"Invalid content type " + fileContentType + ". Must match the pattern " + contentTypeGroups + ".");
// Disallow HTML disguising as other content types per:
// http://www.adambarth.com/papers/2009/barth-caballero-song.pdf
if (!contentTypeGroups.contains("text/html")) {
if (part.getFile() == null) {
return;
}
try (InputStream input = new FileInputStream(part.getFile())) {
byte[] buffer = new byte[1024];
String data = new String(buffer, 0, input.read(buffer)).toLowerCase(Locale.ENGLISH);
String ptr = data.trim();
if (ptr.startsWith("<!")
|| ptr.startsWith("<?")
|| data.startsWith("<html")
|| data.startsWith("<script")
|| data.startsWith("<title")
|| data.startsWith("<body")
|| data.startsWith("<head")
|| data.startsWith("<plaintext")
|| data.startsWith("<table")
|| data.startsWith("<img")
|| data.startsWith("<pre")
|| data.startsWith("text/html")
|| data.startsWith("<a")
|| ptr.startsWith("<frameset")
|| ptr.startsWith("<iframe")
|| ptr.startsWith("<link")
|| ptr.startsWith("<base")
|| ptr.startsWith("<style")
|| ptr.startsWith("<div")
|| ptr.startsWith("<p")
|| ptr.startsWith("<font")
|| ptr.startsWith("<applet")
|| ptr.startsWith("<meta")
|| ptr.startsWith("<center")
|| ptr.startsWith("<form")
|| ptr.startsWith("<isindex")
|| ptr.startsWith("<h1")
|| ptr.startsWith("<h2")
|| ptr.startsWith("<h3")
|| ptr.startsWith("<h4")
|| ptr.startsWith("<h5")
|| ptr.startsWith("<h6")
|| ptr.startsWith("<b")
|| ptr.startsWith("<br")) {
throw new IOException("Can't upload [" + fileContentType + "] file disguising as HTML!");
}
}
}
}
}