/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.fork;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.NotSerializableException;
import java.util.ArrayList;
import java.util.List;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.xml.sax.ContentHandler;
class ForkClient {
private final List<ForkResource> resources = new ArrayList<>();
private final ClassLoader loader;
private final File jar;
private final Process process;
private final DataOutputStream output;
private final DataInputStream input;
private final InputStream error;
public ForkClient(ClassLoader loader, Object object, List<String> java, long serverPulseMillis)
throws IOException, TikaException {
boolean ok = false;
try {
this.loader = loader;
this.jar = createBootstrapJar();
ProcessBuilder builder = new ProcessBuilder();
List<String> command = new ArrayList<>();
command.addAll(java);
command.add("-jar");
command.add(jar.getPath());
command.add(Long.toString(serverPulseMillis));
builder.command(command);
this.process = builder.start();
this.output = new DataOutputStream(process.getOutputStream());
this.input = new DataInputStream(process.getInputStream());
this.error = process.getErrorStream();
waitForStartBeacon();
sendObject(loader, resources);
sendObject(object, resources);
ok = true;
} finally {
if (!ok) {
close();
}
}
}
private void waitForStartBeacon() throws IOException {
while (true) {
consumeErrorStream();
int type = input.read();
if ((byte) type == ForkServer.READY) {
consumeErrorStream();
return;
}
}
}
public synchronized boolean ping() {
try {
output.writeByte(ForkServer.PING);
output.flush();
while (true) {
consumeErrorStream();
int type = input.read();
if (type == ForkServer.PING) {
consumeErrorStream();
return true;
} else {
return false;
}
}
} catch (IOException e) {
return false;
}
}
public synchronized Throwable call(String method, Object... args)
throws IOException, TikaException {
List<ForkResource> r = new ArrayList<>(resources);
output.writeByte(ForkServer.CALL);
output.writeUTF(method);
for (int i = 0; i < args.length; i++) {
sendObject(args[i], r);
}
return waitForResponse(r);
}
/**
* Serializes the object first into an in-memory buffer and then
* writes it to the output stream with a preceding size integer.
*
* @param object object to be serialized
* @param resources list of fork resources, used when adding proxies
* @throws IOException if the object could not be serialized
*/
private void sendObject(Object object, List<ForkResource> resources)
throws IOException, TikaException {
int n = resources.size();
if (object instanceof InputStream) {
resources.add(new InputStreamResource((InputStream) object));
object = new InputStreamProxy(n);
} else if (object instanceof ContentHandler) {
resources.add(new ContentHandlerResource((ContentHandler) object));
object = new ContentHandlerProxy(n);
} else if (object instanceof ClassLoader) {
resources.add(new ClassLoaderResource((ClassLoader) object));
object = new ClassLoaderProxy(n);
}
try {
ForkObjectInputStream.sendObject(object, output);
} catch(NotSerializableException nse) {
// Build a more friendly error message for this
throw new TikaException(
"Unable to serialize " + object.getClass().getSimpleName() +
" to pass to the Forked Parser", nse);
}
waitForResponse(resources);
}
public synchronized void close() {
try {
if (output != null) {
output.close();
}
if (input != null) {
input.close();
}
if (error != null) {
error.close();
}
} catch (IOException ignore) {
}
if (process != null) {
process.destroy();
try {
//TIKA-1933
process.waitFor();
} catch (InterruptedException e) {
}
}
if (jar != null) {
jar.delete();
}
}
private Throwable waitForResponse(List<ForkResource> resources)
throws IOException {
output.flush();
while (true) {
consumeErrorStream();
int type = input.read();
if (type == -1) {
consumeErrorStream();
throw new IOException(
"Lost connection to a forked server process");
} else if (type == ForkServer.RESOURCE) {
ForkResource resource =
resources.get(input.readUnsignedByte());
resource.process(input, output);
} else if ((byte) type == ForkServer.ERROR) {
try {
return (Throwable) ForkObjectInputStream.readObject(
input, loader);
} catch (ClassNotFoundException e) {
throw new IOException(
"Unable to deserialize an exception", e);
}
} else {
return null;
}
}
}
/**
* Consumes all pending bytes from the standard error stream of the
* forked server process, and prints them out to the standard error
* stream of this process. This method should be called always before
* expecting some output from the server, to prevent the server from
* blocking due to a filled up pipe buffer of the error stream.
*
* @throws IOException if the error stream could not be read
*/
private void consumeErrorStream() throws IOException {
int n;
while ((n = error.available()) > 0) {
byte[] b = new byte[n];
n = error.read(b);
if (n > 0) {
System.err.write(b, 0, n);
}
}
}
/**
* Creates a temporary jar file that can be used to bootstrap the forked
* server process. Remember to remove the file when no longer used.
*
* @return the created jar file
* @throws IOException if the bootstrap archive could not be created
*/
private static File createBootstrapJar() throws IOException {
File file = File.createTempFile("apache-tika-fork-", ".jar");
boolean ok = false;
try {
fillBootstrapJar(file);
ok = true;
} finally {
if (!ok) {
file.delete();
}
}
return file;
}
/**
* Fills in the jar file used to bootstrap the forked server process.
* All the required <code>.class</code> files and a manifest with a
* <code>Main-Class</code> entry are written into the archive.
*
* @param file file to hold the bootstrap archive
* @throws IOException if the bootstrap archive could not be created
*/
private static void fillBootstrapJar(File file) throws IOException {
try (JarOutputStream jar = new JarOutputStream(new FileOutputStream(file))) {
String manifest = "Main-Class: " + ForkServer.class.getName() + "\n";
jar.putNextEntry(new ZipEntry("META-INF/MANIFEST.MF"));
jar.write(manifest.getBytes(UTF_8));
Class<?>[] bootstrap = {
ForkServer.class, ForkObjectInputStream.class,
ForkProxy.class, ClassLoaderProxy.class,
MemoryURLConnection.class,
MemoryURLStreamHandler.class,
MemoryURLStreamHandlerFactory.class,
MemoryURLStreamRecord.class
};
ClassLoader loader = ForkServer.class.getClassLoader();
for (Class<?> klass : bootstrap) {
String path = klass.getName().replace('.', '/') + ".class";
try (InputStream input = loader.getResourceAsStream(path)) {
jar.putNextEntry(new JarEntry(path));
IOUtils.copy(input, jar);
}
}
}
}
}