MimeTypeDetectorTest.java example

Explorer
manager.v3-master
- projects
// Copyright 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.util;

import com.google.common.base.Charsets;
import com.google.common.collect.Sets;
import com.google.common.io.Files;
import com.google.enterprise.connector.traversal.MimeTypeMap;
import com.google.enterprise.connector.traversal.ProductionTraversalContext;

import eu.medsea.util.EncodingGuesser;

import junit.framework.TestCase;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Collection;

/** Tests for MimeTypeDetector.  */
public class MimeTypeDetectorTest extends TestCase {
  private static final File NO_EXTENSION =
      new File("testdata/tmp/mimeTypeDetectorTest");

  /**
   * Include two consecutive nulls to force MimeTypeDetector to think
   * it's binary rather than text/plain.
   */
  private static final byte[] PDF_PREFIX =
      "%PDF-1.3\n%\0\0\n".getBytes(Charsets.UTF_8);

  private MimeTypeDetector mimeTypeDetector;
  private final InputStreamFactory notUsedInputStreamFactory =
      new NotUsedInputStreamFactory();

  @Override
  public void setUp() {
    MimeTypeMap mimeTypeMap = new MimeTypeMap();
    mimeTypeMap.setPreferredMimeTypes(
        Sets.newHashSet("text/plain", "text/html", "text/xml"));
    mimeTypeMap.setSupportedMimeTypes(Sets.newHashSet(
        "application/pdf", "application/msword", "application/xml"));
    mimeTypeMap.setExcludedMimeTypes(Sets.newHashSet("text/x-uuencode"));
    ProductionTraversalContext traversalContext =
        new ProductionTraversalContext();
    traversalContext.setMimeTypeMap(mimeTypeMap);
    MimeTypeDetector.setTraversalContext(traversalContext);
    mimeTypeDetector = new MimeTypeDetector();
  }

  public void testSetSupportedEncodings() throws Exception {
    @SuppressWarnings("unchecked") Collection<String> encodings =
        EncodingGuesser.getSupportedEncodings();
    assertTrue(encodings.contains("UTF-8"));
    assertTrue(encodings.contains("ISO-8859-1"));
    assertTrue(encodings.contains("windows-1252"));
    assertTrue(encodings.contains(Charset.defaultCharset().toString()));

    // Need to make a deep copy to restore properly.
    Collection<String> originalEncodings = Sets.newHashSet(encodings);

    MimeTypeDetector.setSupportedEncodings(
        Sets.newHashSet("UTF-16", "US-ASCII"));
    @SuppressWarnings("unchecked") Collection<String> newEncodings =
        EncodingGuesser.getSupportedEncodings();
    assertTrue(newEncodings.contains("UTF-16"));
    assertTrue(newEncodings.contains("US-ASCII"));
    assertTrue(newEncodings.contains(Charset.defaultCharset().toString()));

    // Restore original supported encodings.
    EncodingGuesser.setSupportedEncodings(originalEncodings);
  }

  public void testIllegalArguments() throws Exception {
    try {
      mimeTypeDetector.getMimeType(null, (byte[]) null);
      fail("IllegalArgumentException expected");
    } catch (IllegalArgumentException expected) {
      // Expected.
    }

    try {
      mimeTypeDetector.getMimeType(null, (InputStreamFactory) null);
      fail("IllegalArgumentException expected");
    } catch (IllegalArgumentException expected) {
      // Expected.
    }
  }

  public void testFileExtensionOnly() throws Exception {
    assertEquals("text/html", mimeTypeDetector.getMimeType(
                 "a/\\big.htm", (byte[]) null));

    assertEquals("text/html", mimeTypeDetector.getMimeType(
                 "a/big.html", (InputStreamFactory) null));
  }

  public void testUnknownMimeType() throws Exception {
    // "Unknown" mime type is actually "application/octet-stream".
    assertEquals("application/octet-stream",
                 MimeTypeDetector.UNKNOWN_MIME_TYPE);

    // Truly unknown.
    assertEquals(MimeTypeDetector.UNKNOWN_MIME_TYPE,
                 mimeTypeDetector.getMimeType("a/zork.xyzzy",
                                              (InputStreamFactory) null));

    // A file whose only mimetype is "application/octet-stream" should work.
    // Note: Has internal knowledge of MimeUtil mime-types.properties.
    assertEquals("application/octet-stream", mimeTypeDetector.getMimeType(
                 "a/compiled.o", (InputStreamFactory) null));

    // A file whose mimetype includes "application/octet-stream" as well
    // as others, should return the other, even if it ranks less than
    // "application/octet-stream".
    // Note: Has internal knowledge of MimeUtil mime-types.properties.
    assertEquals("text/x-uuencode", mimeTypeDetector.getMimeType(
                 "a/uuencoded.uu", (InputStreamFactory) null));
  }

  public void testFileExtension() throws Exception {
    assertEquals("text/html", mimeTypeDetector.getMimeType(
        "a/\\big.htm", notUsedInputStreamFactory));

    assertEquals("text/xml", mimeTypeDetector.getMimeType(
        "smb://a.b/a/\\big.xml", notUsedInputStreamFactory));

    assertEquals("application/pdf", mimeTypeDetector.getMimeType(
        "a/\\a.b.cig.pdf", notUsedInputStreamFactory));

    assertEquals("application/msword", mimeTypeDetector.getMimeType(
        "a/big.doc", notUsedInputStreamFactory));
  }

  public void testFileContent() throws Exception {
    assertEquals("text/plain", mimeTypeDetector.getMimeType(
        "a/big", "I am a string of text".getBytes()));

    String xml = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"
        + "<dog>beagle</dog>\n";

    assertEquals("text/xml", mimeTypeDetector.getMimeType(
        "a/big", xml.getBytes()));
  }

  public void testFileContentOnly() throws Exception {
    assertEquals("text/plain", mimeTypeDetector.getMimeType(
        null, "I am a string of text".getBytes()));
  }

  public void testFileContentStream() throws Exception {
    InputStreamFactory inputStreamFactory =
        new StringInputStreamFactory("I am a string of text");

    assertEquals("text/plain", mimeTypeDetector.getMimeType(
        "a/big", inputStreamFactory));

    String xml = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"
        + "<dog>beagle</dog>\n";

    inputStreamFactory = new StringInputStreamFactory(xml);
    assertEquals("text/xml", mimeTypeDetector.getMimeType(
        "a/big", inputStreamFactory));
  }

  public void testFileContentStreamOnly() throws Exception {
    InputStreamFactory inputStreamFactory =
        new StringInputStreamFactory("I am a string of text");
    assertEquals("text/plain", mimeTypeDetector.getMimeType(
        null, inputStreamFactory));

    // Try an actual ms office doc larger than the detector input buffer.
    inputStreamFactory =
        new FileInputStreamFactory("testdata/mocktestdata/test.doc");
    assertEquals("application/msword", mimeTypeDetector.getMimeType(
        null, inputStreamFactory));
  }

  /** Tests that MimeTypeDetector does not try to read the file. */
  public void testNoFileAccess() throws IOException {
    // Testing a file with no extension that looks like PDF.
    Files.write(PDF_PREFIX, NO_EXTENSION);
    try {
      // With no content, we should get an unknown type.
      assertEquals(MimeTypeDetector.UNKNOWN_MIME_TYPE,
          mimeTypeDetector.getMimeType(NO_EXTENSION.getPath(), (byte[]) null));

      // With text content, we should get text/plain.
      assertEquals("text/plain", mimeTypeDetector.getMimeType(
          NO_EXTENSION.getPath(), "I am a string of text".getBytes()));
    } finally {
      NO_EXTENSION.delete();
    }
  }

  private static class NotUsedInputStreamFactory implements InputStreamFactory {
    public InputStream getInputStream() {
      throw new UnsupportedOperationException();
    }
  }

  private static class StringInputStreamFactory implements InputStreamFactory {
    private final String string;

    StringInputStreamFactory(String string) {
      this.string = string;
    }

    public InputStream getInputStream() {
      return new ByteArrayInputStream(string.getBytes());
    }
  }
}