/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.config;
import java.net.URI;
import java.net.URL;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ThreadPoolExecutor;
import org.apache.tika.ResourceLoggingClassLoader;
import org.apache.tika.config.DummyExecutor;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.config.TikaConfigTest;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ErrorParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
* Tests for the Tika Config, which don't require real parsers /
* detectors / etc.
* There's also {@link TikaParserConfigTest} and {@link TikaDetectorConfigTest}
* over in the Tika Parsers project, which do further Tika Config
* testing using real parsers and detectors.
*/
public class TikaConfigTest extends AbstractTikaConfigTest {
/**
* Make sure that a configuration file can't reference the
* {@link AutoDetectParser} class a <parser> configuration element.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-866">TIKA-866</a>
*/
@Test
public void withInvalidParser() throws Exception {
try {
getConfig("TIKA-866-invalid.xml");
fail("AutoDetectParser allowed in a <parser> element");
} catch (TikaException expected) {}
}
/**
* Make sure that with a service loader given, we can
* get different configurable behaviour on parser classes
* which can't be found.
*/
@Test
public void testUnknownParser() throws Exception {
ServiceLoader ignoreLoader = new ServiceLoader(
getClass().getClassLoader(), LoadErrorHandler.IGNORE);
ServiceLoader warnLoader = new ServiceLoader(
getClass().getClassLoader(), LoadErrorHandler.WARN);
ServiceLoader throwLoader = new ServiceLoader(
getClass().getClassLoader(), LoadErrorHandler.THROW);
Path configPath = Paths.get(new URI(getConfigPath("TIKA-1700-unknown-parser.xml")));
TikaConfig ignore = new TikaConfig(configPath, ignoreLoader);
assertNotNull(ignore);
assertNotNull(ignore.getParser());
assertEquals(1, ((CompositeParser)ignore.getParser()).getAllComponentParsers().size());
TikaConfig warn = new TikaConfig(configPath, warnLoader);
assertNotNull(warn);
assertNotNull(warn.getParser());
assertEquals(1, ((CompositeParser)warn.getParser()).getAllComponentParsers().size());
try {
new TikaConfig(configPath, throwLoader);
fail("Shouldn't get here, invalid parser class");
} catch (TikaException expected) {}
}
/**
* Make sure that a configuration file can reference also a composite
* parser class like {@link DefaultParser} in a <parser>
* configuration element.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-866">TIKA-866</a>
*/
@Test
public void asCompositeParser() throws Exception {
try {
getConfig("TIKA-866-composite.xml");
} catch (TikaException e) {
fail("Unexpected TikaException: " + e);
}
}
/**
* Make sure that a valid configuration file without mimetypes or
* detector entries can be loaded without problems.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-866">TIKA-866</a>
*/
@Test
public void onlyValidParser() throws Exception {
try {
getConfig("TIKA-866-valid.xml");
} catch (TikaException e) {
fail("Unexpected TikaException: " + e);
}
}
/**
* TIKA-1145 If the TikaConfig has a ClassLoader set on it,
* that should be used when loading the mimetypes and when
* discovering services
*/
@Test
public void ensureClassLoaderUsedEverywhere() throws Exception {
ResourceLoggingClassLoader customLoader =
new ResourceLoggingClassLoader(getClass().getClassLoader());
TikaConfig config;
// Without a classloader set, normal one will be used
config = new TikaConfig();
config.getMediaTypeRegistry();
config.getParser();
assertEquals(0, customLoader.getLoadedResources().size());
// With a classloader set, resources will come through it
config = new TikaConfig(customLoader);
config.getMediaTypeRegistry();
config.getParser();
Map<String,List<URL>> resources = customLoader.getLoadedResources();
int resourcesCount = resources.size();
assertTrue(
"Not enough things used the classloader, found only " + resourcesCount,
resourcesCount > 3
);
// Ensure everything that should do, did use it
// - Parsers
assertNotNull(resources.get("META-INF/services/org.apache.tika.parser.Parser"));
// - Detectors
assertNotNull(resources.get("META-INF/services/org.apache.tika.detect.Detector"));
// - Built-In Mimetypes
assertNotNull(resources.get("org/apache/tika/mime/tika-mimetypes.xml"));
// - Custom Mimetypes
assertNotNull(resources.get("org/apache/tika/mime/custom-mimetypes.xml"));
}
/**
* TIKA-1445 It should be possible to exclude DefaultParser from
* certain types, so another parser explicitly listed will take them
*/
@Test
public void defaultParserWithExcludes() throws Exception {
try {
TikaConfig config = getConfig("TIKA-1445-default-except.xml");
CompositeParser cp = (CompositeParser)config.getParser();
List<Parser> parsers = cp.getAllComponentParsers();
Parser p;
// Will be the three parsers defined in the xml
assertEquals(3, parsers.size());
// Should have a wrapped DefaultParser, not the main DefaultParser,
// as it is excluded from handling certain classes
p = parsers.get(0);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(DefaultParser.class, ((ParserDecorator)p).getWrappedParser().getClass());
// Should have two others which claim things, which they wouldn't
// otherwise handle
p = parsers.get(1);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(EmptyParser.class, ((ParserDecorator)p).getWrappedParser().getClass());
assertEquals("hello/world", p.getSupportedTypes(null).iterator().next().toString());
p = parsers.get(2);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(ErrorParser.class, ((ParserDecorator)p).getWrappedParser().getClass());
assertEquals("fail/world", p.getSupportedTypes(null).iterator().next().toString());
} catch (TikaException e) {
fail("Unexpected TikaException: " + e);
}
}
/**
* TIKA-1653 If one parser has child parsers, those child parsers shouldn't
* show up at the top level as well
*/
@Test
public void parserWithChildParsers() throws Exception {
try {
TikaConfig config = getConfig("TIKA-1653-norepeat.xml");
CompositeParser cp = (CompositeParser)config.getParser();
List<Parser> parsers = cp.getAllComponentParsers();
Parser p;
// Just 2 top level parsers
assertEquals(2, parsers.size());
// Should have a CompositeParser with 2 child ones, and
// and a wrapped empty parser
p = parsers.get(0);
assertTrue(p.toString(), p instanceof CompositeParser);
assertEquals(2, ((CompositeParser)p).getAllComponentParsers().size());
p = parsers.get(1);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(EmptyParser.class, ((ParserDecorator)p).getWrappedParser().getClass());
assertEquals("hello/world", p.getSupportedTypes(null).iterator().next().toString());
} catch (TikaException e) {
fail("Unexpected TikaException: " + e);
}
}
@Test
public void testDynamicServiceLoaderFromConfig() throws Exception {
URL url = TikaConfigTest.class.getResource("TIKA-1700-dynamic.xml");
TikaConfig config = new TikaConfig(url);
DummyParser parser = (DummyParser)config.getParser();
ServiceLoader loader = parser.getLoader();
boolean dynamicValue = loader.isDynamic();
assertTrue("Dynamic Service Loading Should be true", dynamicValue);
}
@Test
public void testTikaExecutorServiceFromConfig() throws Exception {
URL url = TikaConfigTest.class.getResource("TIKA-1762-executors.xml");
TikaConfig config = new TikaConfig(url);
ThreadPoolExecutor executorService = (ThreadPoolExecutor)config.getExecutorService();
assertTrue("Should use Dummy Executor", (executorService instanceof DummyExecutor));
assertEquals("Should have configured Core Threads", 3, executorService.getCorePoolSize());
assertEquals("Should have configured Max Threads", 10, executorService.getMaximumPoolSize());
}
}