// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.semantic.index;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.talend.dataquality.semantic.api.CategoryRegistryManager;
/**
* A read-only directory that reads index from a JAR file. It supports several URI scheme:
* <ul>
* <li>jar</li>
* <li>file</li>
* <li>bundleresource</li>
* </ul>
*/
public class ClassPathDirectory {
private static final Logger LOGGER = Logger.getLogger(ClassPathDirectory.class);
private static JARDirectoryProvider provider = new SingletonProvider();
private ClassPathDirectory() {
}
/**
* Set the location of index extraction for directories opened by jar URI.
* <p/>
* By default, if this method is not called, the index will be extracted to a sub-folder of java.io.tmpdir.
*
* @deprecated use {@link CategoryRegistryManager.setLocalRegistryPath(folder)} instead
*/
@Deprecated
public static void setLocalIndexFolder(String folder) {
CategoryRegistryManager.setLocalRegistryPath(folder);
}
/**
* Allow external code to change behavior about extracted Lucene indexes (always extract a fresh copy or reuse
* previous extract).
*
* @param provider An implementation of {@link JARDirectoryProvider}. See {@link BasicProvider} or
* {@link SingletonProvider} for examples.
* @see BasicProvider
* @see SingletonProvider
*/
public static synchronized void setProvider(JARDirectoryProvider provider) {
if (provider == null) {
throw new IllegalArgumentException("Provider can not be null.");
}
ClassPathDirectory.provider = provider;
}
/**
* <p>
* Creates a new {@link Directory directory} that picks up the right implementation depending on URI's scheme.
* </p>
*
* @param uri A valid URI to a Lucene index
* @return A {@link Directory} to the Lucene content in <code>uri</code>.
*/
public static synchronized Directory open(URI uri) {
if ("jar".equals(uri.getScheme())) {
LOGGER.info("Opening '" + uri + "' ...");
try {
return provider.get(uri);
} catch (Exception e) {
throw new IllegalArgumentException("Unable to open JAR '" + uri + "'.", e);
}
} else if ("file".equals(uri.getScheme())) {
LOGGER.debug("Opening '" + uri + "' ...");
try {
return FSDirectory.open(new File(uri));
} catch (IOException e) {
throw new IllegalArgumentException("Unable to open path '" + uri + "'.", e);
}
} else if ("bundleresource".equals(uri.getScheme())) { // for OSGI environment
LOGGER.info("Opening '" + uri + "' ...");
try {
final String path = PlatformPathUtil.getFilePathByPlatformURL(uri.toURL());
return FSDirectory.open(new File(path));
} catch (IOException e) {
throw new IllegalArgumentException("Unable to open bundleresource '" + uri + "'.", e);
}
} else {
LOGGER.info("Opening '" + uri + "' ...");
throw new UnsupportedOperationException("Unsupported scheme '" + uri.getScheme() + "'.");
}
}
public static void destroy() {
provider.destroy();
}
/**
* An interface to provide Lucene indexes based on provided location (as URI).
*/
public interface JARDirectoryProvider {
/**
* Returns a {@link Directory lucene directory} for provided location (as URI).
*
* @param uri An URI to a JAR file.
* @return A {@link Directory lucene directory} ready to be used in Lucene code.
* @throws Exception
*/
Directory get(URI uri) throws IOException;
/**
* Destroys all cached resources by this provider.
*/
void destroy();
}
/**
* An implementation that extract only once content on disk for a given URI.
*/
public static class SingletonProvider implements JARDirectoryProvider {
private static final BasicProvider provider = new BasicProvider();
private static final Map<URI, Directory> instances = new HashMap<>();
@Override
public synchronized Directory get(URI uri) throws IOException {
if (instances.get(uri) == null) {
instances.put(uri, provider.get(uri));
}
return instances.get(uri);
}
@Override
public void destroy() {
provider.destroy();
}
}
/**
* An implementation that does not perform any reuse of previously extracted content.
*/
public static class BasicProvider implements JARDirectoryProvider {
private static final Map<URI, FileSystem> openedJars = new HashMap<>();
/**
* Holds all opened class path directory instances for clean up
* TODO This is temporary until a more global resource management system is found/proposed
*
* @see #destroy()
*/
private static final Set<JARDirectory> classPathDirectories = new HashSet<>();
private static FileSystem openOrGet(String uri) throws IOException {
FileSystem fs;
final URI jarURI = URI.create(uri);
synchronized (openedJars) {
fs = openedJars.get(jarURI);
if (fs == null) {
fs = FileSystems.newFileSystem(jarURI, Collections.<String, String> emptyMap());
openedJars.put(jarURI, fs);
}
}
return fs;
}
@Override
public Directory get(URI uri) throws IOException {
String jarFile = StringUtils.substringBefore(uri.toString(), "!"); //$NON-NLS-1$
String extractionRoot = CategoryRegistryManager.getLocalRegistryPath();
JARDirectory.JARDescriptor openedJar = new JARDirectory.JARDescriptor();
// Extract all nested JARs
StringTokenizer tokenizer = new StringTokenizer(uri.toString(), "!"); //$NON-NLS-1$
FileSystem fs = null;
while (tokenizer.hasMoreTokens()) {
final String current = tokenizer.nextToken();
if (!tokenizer.hasMoreTokens()) {
break;
} else if (fs == null) {
fs = openOrGet(current);
} else { // fs != null
final Path path = fs.getPath(current);
final String unzipFile = extractionRoot + File.separator + path.getFileName();
final Path destFile = Paths.get(unzipFile);
final File destinationFile = destFile.toFile();
if (!destinationFile.exists()) {
destinationFile.mkdirs();
Files.copy(path, destFile, StandardCopyOption.REPLACE_EXISTING);
}
// UUID ensures the path is unique, no need for openOrGet(...)
fs = FileSystems.newFileSystem(destFile, Thread.currentThread().getContextClassLoader());
}
}
openedJar.fileSystem = fs;
openedJar.jarFileName = jarFile;
String directory = StringUtils.substringAfterLast(uri.toString(), "!"); //$NON-NLS-1$
LOGGER.debug("Opening '" + jarFile + "' at directory '" + directory + "' ...");
final JARDirectory jarDirectory = new JARDirectory(extractionRoot, openedJar, directory);
classPathDirectories.add(jarDirectory);
return jarDirectory;
}
/**
* Destroy all resources that instances may have created on disk.
*/
@Override
public void destroy() {
final Iterator<JARDirectory> iterator = classPathDirectories.iterator();
while (iterator.hasNext()) {
final JARDirectory jarDirectory = iterator.next();
try {
jarDirectory.close();
} catch (IOException e) {
LOGGER.error("Unable to close directory at " + jarDirectory.indexDirectory + " (location : "
+ jarDirectory.extractPath + ").", e);
} finally {
iterator.remove();
}
}
for (Map.Entry<URI, FileSystem> entry : openedJars.entrySet()) {
try {
entry.getValue().close();
} catch (IOException e) {
LOGGER.error("Unable to close " + entry.getValue() + ".", e);
}
}
}
}
}