// Copyright 2009 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.enterprise.connector.util; import com.google.common.base.Charsets; import com.google.common.collect.Sets; import com.google.common.io.Files; import com.google.enterprise.connector.traversal.MimeTypeMap; import com.google.enterprise.connector.traversal.ProductionTraversalContext; import eu.medsea.util.EncodingGuesser; import junit.framework.TestCase; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.Collection; /** Tests for MimeTypeDetector. */ public class MimeTypeDetectorTest extends TestCase { private static final File NO_EXTENSION = new File("testdata/tmp/mimeTypeDetectorTest"); /** * Include two consecutive nulls to force MimeTypeDetector to think * it's binary rather than text/plain. */ private static final byte[] PDF_PREFIX = "%PDF-1.3\n%\0\0\n".getBytes(Charsets.UTF_8); private MimeTypeDetector mimeTypeDetector; private final InputStreamFactory notUsedInputStreamFactory = new NotUsedInputStreamFactory(); @Override public void setUp() { MimeTypeMap mimeTypeMap = new MimeTypeMap(); mimeTypeMap.setPreferredMimeTypes( Sets.newHashSet("text/plain", "text/html", "text/xml")); mimeTypeMap.setSupportedMimeTypes(Sets.newHashSet( "application/pdf", "application/msword", "application/xml")); mimeTypeMap.setExcludedMimeTypes(Sets.newHashSet("text/x-uuencode")); ProductionTraversalContext traversalContext = new ProductionTraversalContext(); traversalContext.setMimeTypeMap(mimeTypeMap); MimeTypeDetector.setTraversalContext(traversalContext); mimeTypeDetector = new MimeTypeDetector(); } public void testSetSupportedEncodings() throws Exception { @SuppressWarnings("unchecked") Collection<String> encodings = EncodingGuesser.getSupportedEncodings(); assertTrue(encodings.contains("UTF-8")); assertTrue(encodings.contains("ISO-8859-1")); assertTrue(encodings.contains("windows-1252")); assertTrue(encodings.contains(Charset.defaultCharset().toString())); // Need to make a deep copy to restore properly. Collection<String> originalEncodings = Sets.newHashSet(encodings); MimeTypeDetector.setSupportedEncodings( Sets.newHashSet("UTF-16", "US-ASCII")); @SuppressWarnings("unchecked") Collection<String> newEncodings = EncodingGuesser.getSupportedEncodings(); assertTrue(newEncodings.contains("UTF-16")); assertTrue(newEncodings.contains("US-ASCII")); assertTrue(newEncodings.contains(Charset.defaultCharset().toString())); // Restore original supported encodings. EncodingGuesser.setSupportedEncodings(originalEncodings); } public void testIllegalArguments() throws Exception { try { mimeTypeDetector.getMimeType(null, (byte[]) null); fail("IllegalArgumentException expected"); } catch (IllegalArgumentException expected) { // Expected. } try { mimeTypeDetector.getMimeType(null, (InputStreamFactory) null); fail("IllegalArgumentException expected"); } catch (IllegalArgumentException expected) { // Expected. } } public void testFileExtensionOnly() throws Exception { assertEquals("text/html", mimeTypeDetector.getMimeType( "a/\\big.htm", (byte[]) null)); assertEquals("text/html", mimeTypeDetector.getMimeType( "a/big.html", (InputStreamFactory) null)); } public void testUnknownMimeType() throws Exception { // "Unknown" mime type is actually "application/octet-stream". assertEquals("application/octet-stream", MimeTypeDetector.UNKNOWN_MIME_TYPE); // Truly unknown. assertEquals(MimeTypeDetector.UNKNOWN_MIME_TYPE, mimeTypeDetector.getMimeType("a/zork.xyzzy", (InputStreamFactory) null)); // A file whose only mimetype is "application/octet-stream" should work. // Note: Has internal knowledge of MimeUtil mime-types.properties. assertEquals("application/octet-stream", mimeTypeDetector.getMimeType( "a/compiled.o", (InputStreamFactory) null)); // A file whose mimetype includes "application/octet-stream" as well // as others, should return the other, even if it ranks less than // "application/octet-stream". // Note: Has internal knowledge of MimeUtil mime-types.properties. assertEquals("text/x-uuencode", mimeTypeDetector.getMimeType( "a/uuencoded.uu", (InputStreamFactory) null)); } public void testFileExtension() throws Exception { assertEquals("text/html", mimeTypeDetector.getMimeType( "a/\\big.htm", notUsedInputStreamFactory)); assertEquals("text/xml", mimeTypeDetector.getMimeType( "smb://a.b/a/\\big.xml", notUsedInputStreamFactory)); assertEquals("application/pdf", mimeTypeDetector.getMimeType( "a/\\a.b.cig.pdf", notUsedInputStreamFactory)); assertEquals("application/msword", mimeTypeDetector.getMimeType( "a/big.doc", notUsedInputStreamFactory)); } public void testFileContent() throws Exception { assertEquals("text/plain", mimeTypeDetector.getMimeType( "a/big", "I am a string of text".getBytes())); String xml = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n" + "<dog>beagle</dog>\n"; assertEquals("text/xml", mimeTypeDetector.getMimeType( "a/big", xml.getBytes())); } public void testFileContentOnly() throws Exception { assertEquals("text/plain", mimeTypeDetector.getMimeType( null, "I am a string of text".getBytes())); } public void testFileContentStream() throws Exception { InputStreamFactory inputStreamFactory = new StringInputStreamFactory("I am a string of text"); assertEquals("text/plain", mimeTypeDetector.getMimeType( "a/big", inputStreamFactory)); String xml = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n" + "<dog>beagle</dog>\n"; inputStreamFactory = new StringInputStreamFactory(xml); assertEquals("text/xml", mimeTypeDetector.getMimeType( "a/big", inputStreamFactory)); } public void testFileContentStreamOnly() throws Exception { InputStreamFactory inputStreamFactory = new StringInputStreamFactory("I am a string of text"); assertEquals("text/plain", mimeTypeDetector.getMimeType( null, inputStreamFactory)); // Try an actual ms office doc larger than the detector input buffer. inputStreamFactory = new FileInputStreamFactory("testdata/mocktestdata/test.doc"); assertEquals("application/msword", mimeTypeDetector.getMimeType( null, inputStreamFactory)); } /** Tests that MimeTypeDetector does not try to read the file. */ public void testNoFileAccess() throws IOException { // Testing a file with no extension that looks like PDF. Files.write(PDF_PREFIX, NO_EXTENSION); try { // With no content, we should get an unknown type. assertEquals(MimeTypeDetector.UNKNOWN_MIME_TYPE, mimeTypeDetector.getMimeType(NO_EXTENSION.getPath(), (byte[]) null)); // With text content, we should get text/plain. assertEquals("text/plain", mimeTypeDetector.getMimeType( NO_EXTENSION.getPath(), "I am a string of text".getBytes())); } finally { NO_EXTENSION.delete(); } } private static class NotUsedInputStreamFactory implements InputStreamFactory { public InputStream getInputStream() { throw new UnsupportedOperationException(); } } private static class StringInputStreamFactory implements InputStreamFactory { private final String string; StringInputStreamFactory(String string) { this.string = string; } public InputStream getInputStream() { return new ByteArrayInputStream(string.getBytes()); } } }