/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tika.fork; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.PipedInputStream; import java.io.PipedOutputStream; import java.nio.charset.StandardCharsets; import java.util.concurrent.Semaphore; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.mock.MockParser; import org.apache.tika.sax.BodyContentHandler; import org.junit.Test; import org.xml.sax.ContentHandler; import org.xml.sax.helpers.DefaultHandler; public class ForkParserTest { @Test public void testHelloWorld() throws Exception { ForkParser parser = new ForkParser( ForkParserTest.class.getClassLoader(), new ForkTestParser()); try { Metadata metadata = new Metadata(); ContentHandler output = new BodyContentHandler(); InputStream stream = new ByteArrayInputStream(new byte[0]); ParseContext context = new ParseContext(); parser.parse(stream, output, metadata, context); assertEquals("Hello, World!", output.toString().trim()); assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE)); } finally { parser.close(); } } @Test public void testSerialParsing() throws Exception { ForkParser parser = new ForkParser( ForkParserTest.class.getClassLoader(), new ForkTestParser()); try { ParseContext context = new ParseContext(); for (int i = 0; i < 10; i++) { ContentHandler output = new BodyContentHandler(); InputStream stream = new ByteArrayInputStream(new byte[0]); parser.parse(stream, output, new Metadata(), context); assertEquals("Hello, World!", output.toString().trim()); } } finally { parser.close(); } } @Test public void testParallelParsing() throws Exception { final ForkParser parser = new ForkParser( ForkParserTest.class.getClassLoader(), new ForkTestParser()); try { final ParseContext context = new ParseContext(); Thread[] threads = new Thread[10]; ContentHandler[] output = new ContentHandler[threads.length]; for (int i = 0; i < threads.length; i++) { final ContentHandler o = new BodyContentHandler(); output[i] = o; threads[i] = new Thread() { public void run() { try { InputStream stream = new ByteArrayInputStream(new byte[0]); parser.parse(stream, o, new Metadata(), context); } catch (Exception e) { e.printStackTrace(); } } }; threads[i].start(); } for (int i = 0; i < threads.length; i++) { threads[i].join(); assertEquals("Hello, World!", output[i].toString().trim()); } } finally { parser.close(); } } @Test public void testPoolSizeReached() throws Exception { final ForkParser parser = new ForkParser( ForkParserTest.class.getClassLoader(), new ForkTestParser()); try { final Semaphore barrier = new Semaphore(0); Thread[] threads = new Thread[parser.getPoolSize()]; PipedOutputStream[] pipes = new PipedOutputStream[threads.length]; final ParseContext context = new ParseContext(); for (int i = 0; i < threads.length; i++) { final PipedInputStream input = new PipedInputStream() { @Override public synchronized int read() throws IOException { barrier.release(); return super.read(); } }; pipes[i] = new PipedOutputStream(input); threads[i] = new Thread() { public void run() { try { ContentHandler o = new DefaultHandler(); parser.parse(input, o, new Metadata(), context); } catch (Exception e) { e.printStackTrace(); } } }; threads[i].start(); } // Wait until all the background parsers have been started barrier.acquire(parser.getPoolSize()); final ContentHandler o = new BodyContentHandler(); Thread blocked = new Thread() { public void run() { try { barrier.release(); InputStream stream = new ByteArrayInputStream(new byte[0]); parser.parse(stream, o, new Metadata(), context); } catch (Exception e) { e.printStackTrace(); } } }; blocked.start(); // Wait until the last thread is started, and then some to // make sure that it would have had a chance to start processing // data had it not been blocked. barrier.acquire(); Thread.sleep(1000); assertEquals("", o.toString()); for (int i = 0; i < threads.length; i++) { pipes[i].close(); threads[i].join(); } blocked.join(); assertEquals("Hello, World!", o.toString().trim()); } finally { parser.close(); } } @Test public void testPulse() throws Exception { //test default 5000 ms ForkParser forkParser = new ForkParser(ForkParserTest.class.getClassLoader(), new MockParser()); String sleepCommand = "<mock>\n" + " <write element=\"p\">Hello, World!</write>\n" + " <hang millis=\"11000\" heavy=\"false\" interruptible=\"false\" />\n" + "</mock>"; ContentHandler o = new BodyContentHandler(-1); Metadata m = new Metadata(); ParseContext c = new ParseContext(); try { forkParser.parse(new ByteArrayInputStream(sleepCommand.getBytes(StandardCharsets.UTF_8)), o, m, c); fail("should have thrown IOException"); } catch (TikaException e) { assertTrue("failed to communicate with forked parser process", true); } //test setting very short pulse (10 ms) and a parser that takes at least 1000 ms forkParser = new ForkParser(ForkParserTest.class.getClassLoader(), new MockParser()); forkParser.setServerPulseMillis(10); sleepCommand = "<mock>\n" + " <write element=\"p\">Hello, World!</write>\n" + " <hang millis=\"1000\" heavy=\"false\" interruptible=\"false\" />\n" + "</mock>"; o = new BodyContentHandler(-1); m = new Metadata(); c = new ParseContext(); try { forkParser.parse(new ByteArrayInputStream(sleepCommand.getBytes(StandardCharsets.UTF_8)), o, m, c); fail("Should have thrown exception"); } catch (IOException|TikaException e) { assertTrue("should have thrown IOException lost connection", true); } } }