/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tika.detect; import static org.junit.Assert.assertEquals; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MimeDetectionTest; import org.junit.Before; import org.junit.Test; public class MimeDetectionWithNNTest { private Detector detector; /** @inheritDoc */ @Before public void setUp() { detector = new NNExampleModelDetector(); } /** * The test case only works on the detector that only has grb model as * currently the grb model is used as an example; if more models are added * in the TrainedModelDetector, the following tests will need to modified to reflect * the corresponding type instead of test-equal with the "OCTET_STREAM"; * * @throws Exception */ @Test public void testDetection() throws Exception { String octetStream_str = MediaType.OCTET_STREAM.toString(); String grb_str = "application/x-grib"; testFile(grb_str, "gdas1.forecmwf.2014062612.grib2"); testFile(grb_str, "GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb"); testFile(octetStream_str, "circles.svg"); testFile(octetStream_str, "circles-with-prefix.svg"); testFile(octetStream_str, "datamatrix.png"); testFile(octetStream_str, "test.html"); testFile(octetStream_str, "test-iso-8859-1.xml"); testFile(octetStream_str, "test-utf8.xml"); testFile(octetStream_str, "test-utf8-bom.xml"); testFile(octetStream_str, "test-utf16le.xml"); testFile(octetStream_str, "test-utf16be.xml"); testFile(octetStream_str, "test-long-comment.xml"); testFile(octetStream_str, "stylesheet.xsl"); testUrl(octetStream_str, "http://www.ai.sri.com/daml/services/owl-s/1.2/Process.owl", "test-difficult-rdf1.xml"); testUrl(octetStream_str, "http://www.w3.org/2002/07/owl#", "test-difficult-rdf2.xml"); // add evil test from TIKA-327 testFile(octetStream_str, "test-tika-327.html"); // add another evil html test from TIKA-357 testFile(octetStream_str, "testlargerbuffer.html"); // test fragment of HTML with <div> (TIKA-1102) testFile(octetStream_str, "htmlfragment"); // test binary CGM detection (TIKA-1170) testFile(octetStream_str, "plotutils-bin-cgm-v3.cgm"); // test HTML detection of malformed file, previously identified as // image/cgm (TIKA-1170) testFile(octetStream_str, "test-malformed-header.html.bin"); // test GCMD Directory Interchange Format (.dif) TIKA-1561 testFile(octetStream_str, "brwNIMS_2014.dif"); } private void testUrl(String expected, String url, String file) throws IOException { InputStream in = MimeDetectionTest.class.getResourceAsStream(file); testStream(expected, url, in); } private void testFile(String expected, String filename) throws IOException { InputStream in = MimeDetectionTest.class.getResourceAsStream(filename); testStream(expected, filename, in); } private void testStream(String expected, String urlOrFileName, InputStream in) throws IOException { assertNotNull("Test stream: [" + urlOrFileName + "] is null!", in); if (!in.markSupported()) { in = new java.io.BufferedInputStream(in); } try { Metadata metadata = new Metadata(); String mime = this.detector.detect(in, metadata).toString(); assertEquals( urlOrFileName + " is not properly detected: detected.", expected, mime); // Add resource name and test again // metadata.set(Metadata.RESOURCE_NAME_KEY, urlOrFileName); mime = this.detector.detect(in, metadata).toString(); assertEquals(urlOrFileName + " is not properly detected after adding resource name.", expected, mime); } finally { in.close(); } } private void assertNotNull(String string, InputStream in) { // TODO Auto-generated method stub } /** * Test for type detection of empty documents. */ @Test public void testEmptyDocument() throws IOException { assertEquals(MediaType.OCTET_STREAM, detector.detect( new ByteArrayInputStream(new byte[0]), new Metadata())); } }