package org.apache.tika.batch.fs; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import org.apache.tika.batch.BatchProcessDriverCLI; import org.junit.Test; public class BatchDriverTest extends FSBatchTestBase { //for debugging, turn logging off/on via resources/log4j.properties for the driver //and log4j_process.properties for the process. @Test(timeout = 15000) public void oneHeavyHangTest() throws Exception { //batch runner hits one heavy hang file, keep going Path outputDir = getNewOutputDir("daemon-"); assertTrue(Files.isDirectory(outputDir)); //make sure output directory is empty! assertEquals(0, countChildren(outputDir)); String[] args = getDefaultCommandLineArgsArr("one_heavy_hang", outputDir, null); BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args); driver.execute(); assertEquals(0, driver.getNumRestarts()); assertFalse(driver.getUserInterrupted()); assertEquals(5, countChildren(outputDir)); assertContains("first test file", readFileToString(outputDir.resolve("test2_ok.xml.xml"), UTF_8)); } @Test(timeout = 30000) public void restartOnFullHangTest() throws Exception { //batch runner hits more heavy hangs than threads; needs to restart Path outputDir = getNewOutputDir("daemon-"); //make sure output directory is empty! assertEquals(0, countChildren(outputDir)); String[] args = getDefaultCommandLineArgsArr("heavy_heavy_hangs", outputDir, null); BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args); driver.execute(); //could be one or two depending on timing assertTrue(driver.getNumRestarts() > 0); assertFalse(driver.getUserInterrupted()); assertContains("first test file", readFileToString(outputDir.resolve("test6_ok.xml.xml"), UTF_8)); } @Test(timeout = 15000) public void noRestartTest() throws Exception { Path outputDir = getNewOutputDir("daemon-"); //make sure output directory is empty! assertEquals(0, countChildren(outputDir)); String[] args = getDefaultCommandLineArgsArr("no_restart", outputDir, null); String[] mod = Arrays.copyOf(args, args.length + 2); mod[args.length] = "-numConsumers"; mod[args.length+1] = "1"; BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", mod); driver.execute(); assertEquals(0, driver.getNumRestarts()); assertFalse(driver.getUserInterrupted()); assertEquals(2, countChildren(outputDir)); Path test2 = outputDir.resolve("test2_norestart.xml.xml"); assertTrue("test2_norestart.xml", Files.exists(test2)); Path test3 = outputDir.resolve("test3_ok.xml.xml"); assertFalse("test3_ok.xml", Files.exists(test3)); } @Test(timeout = 15000) public void restartOnOOMTest() throws Exception { //batch runner hits more heavy hangs than threads; needs to restart Path outputDir = getNewOutputDir("daemon-"); //make sure output directory is empty! assertEquals(0, countChildren(outputDir)); String[] args = getDefaultCommandLineArgsArr("oom", outputDir, null); BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args); driver.execute(); assertEquals(1, driver.getNumRestarts()); assertFalse(driver.getUserInterrupted()); assertContains("first test file", readFileToString(outputDir.resolve("test2_ok.xml.xml"), UTF_8)); } @Test(timeout = 30000) public void allHeavyHangsTestWithStarvedCrawler() throws Exception { //this tests that if all consumers are hung and the crawler is //waiting to add to the queue, there isn't deadlock. The BatchProcess should //just shutdown, and the driver should restart Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-"); Map<String, String> args = new HashMap<>(); args.put("-numConsumers", "2"); args.put("-maxQueueSize", "2"); String[] commandLine = getDefaultCommandLineArgsArr("heavy_heavy_hangs", outputDir, args); BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", commandLine); driver.execute(); assertEquals(3, driver.getNumRestarts()); assertFalse(driver.getUserInterrupted()); assertContains("first test file", readFileToString(outputDir.resolve("test6_ok.xml.xml"), UTF_8)); } @Test(timeout = 30000) public void maxRestarts() throws Exception { //tests that maxRestarts works //if -maxRestarts is not correctly removed from the commandline, //FSBatchProcessCLI's cli parser will throw an Unrecognized option exception Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-"); Map<String, String> args = new HashMap<>(); args.put("-numConsumers", "1"); args.put("-maxQueueSize", "10"); args.put("-maxRestarts", "2"); String[] commandLine = getDefaultCommandLineArgsArr("max_restarts", outputDir, args); BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", commandLine); driver.execute(); assertEquals(2, driver.getNumRestarts()); assertFalse(driver.getUserInterrupted()); assertEquals(3, countChildren(outputDir)); } @Test(timeout = 30000) public void maxRestartsBadParameter() throws Exception { //tests that maxRestarts must be followed by an Integer Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-"); Map<String, String> args = new HashMap<>(); args.put("-numConsumers", "1"); args.put("-maxQueueSize", "10"); args.put("-maxRestarts", "zebra"); String[] commandLine = getDefaultCommandLineArgsArr("max_restarts", outputDir, args); boolean ex = false; try { BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", commandLine); driver.execute(); } catch (IllegalArgumentException e) { ex = true; } assertTrue("IllegalArgumentException should have been thrown", ex); } @Test(timeout = 30000) public void testNoRestartIfProcessFails() throws Exception { //tests that if something goes horribly wrong with FSBatchProcessCLI //the driver will not restart it again and again //this calls a bad xml file which should trigger a no restart exit. Path outputDir = getNewOutputDir("nostart-norestart-"); Map<String, String> args = new HashMap<>(); args.put("-numConsumers", "1"); args.put("-maxQueueSize", "10"); String[] commandLine = getDefaultCommandLineArgsArr("basic", outputDir, args); BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-broken.xml", commandLine); driver.execute(); assertEquals(0, countChildren(outputDir)); assertEquals(0, driver.getNumRestarts()); } @Test(timeout = 30000) public void testNoRestartIfProcessFailsTake2() throws Exception { Path outputDir = getNewOutputDir("nostart-norestart-"); Map<String, String> args = new HashMap<>(); args.put("-numConsumers", "1"); args.put("-maxQueueSize", "10"); args.put("-somethingOrOther", "I don't Know"); String[] commandLine = getDefaultCommandLineArgsArr("basic", outputDir, args); BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", commandLine); driver.execute(); assertEquals(0, countChildren(outputDir)); assertEquals(0, driver.getNumRestarts()); } }