/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.studio.io.data.internal.file;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.tika.Tika;
import com.rapidminer.core.io.data.source.DataSourceFactory;
import com.rapidminer.core.io.data.source.DataSourceFactoryRegistry;
import com.rapidminer.core.io.data.source.FileDataSource;
import com.rapidminer.core.io.data.source.FileDataSourceFactory;
import com.rapidminer.core.io.gui.ImportWizard;
import com.rapidminer.core.io.gui.WizardStep;
import com.rapidminer.tools.container.Pair;
/**
* The factory for the {@link LocalFileDataSource}.
*
* @author Nils Woehler
* @since 7.0.0
*/
public final class LocalFileDataSourceFactory implements DataSourceFactory<LocalFileDataSource> {
@Override
public LocalFileDataSource createNew() {
return new LocalFileDataSource();
}
/**
* Creates a new instance of the {@link LocalFileDataSource} and sets the provided path as the
* file location.
*
* @param wizard
* the import wizard
* @param path
* the file location for the new {@link LocalFileDataSource} instance
* @param factory
* the {@link FileDataSourceFactory} to read the file, or {@code null} if none has
* been chosen yet
* @return the new {@link LocalFileDataSource} instance
*/
public <D extends FileDataSource> LocalFileDataSource createNew(ImportWizard wizard, Path path,
FileDataSourceFactory<D> factory) {
LocalFileDataSource localFileDataSource = new LocalFileDataSource();
localFileDataSource.setLocation(path);
if (factory != null) {
// create a file data source if factory is provided
D fileDataSource = factory.createNew(path);
// update the local file data source
localFileDataSource.setFileDataSourceFactory(factory);
localFileDataSource.setFileDataSource(fileDataSource);
// and add file data source steps to the wizard
for (WizardStep step : factory.createCustomSteps(wizard, fileDataSource)) {
wizard.addStep(step);
}
}
return localFileDataSource;
}
@Override
public String getI18NKey() {
return "local_file";
}
@Override
public Class<LocalFileDataSource> getDataSourceClass() {
return LocalFileDataSource.class;
}
@Override
public List<WizardStep> createCustomSteps(ImportWizard wizard, LocalFileDataSource dataSource) {
return Collections.emptyList();
}
@Override
public WizardStep createLocationStep(ImportWizard wizard) {
List<FileDataSourceFactory<?>> factories = DataSourceFactoryRegistry.INSTANCE.getFileFactories();
List<Pair<String, Set<String>>> allFileEndings = new LinkedList<>();
for (FileDataSourceFactory<?> factory : factories) {
allFileEndings.add(new Pair<>(factory.getI18NKey(), factory.getFileExtensions()));
}
return new LocalFileLocationWizardStep(allFileEndings, wizard);
}
/**
* As described in the {@link FileDataSourceFactory#getMimeTypes()} and
* {@link FileDataSourceFactory#getFileExtensions()} methods this method looks up the
* responsible {@link FileDataSourceFactory} for the provided file. It first uses {@link Tika}
* to look-up the MIME type of the file and checks whether a {@link FileDataSource} for the
* detected MIME type is available. If no {@link FileDataSource} for the selected MIME type is
* registered it checks whether a {@link LocalFileDataSourceFactory} is responsible for the file
* extension. If still no match could be found {@code null} is returned.
*
* @param filePath
* the path to the file which should be imported
* @return the responsible {@link FileDataSourceFactory} or {@code null} if none could be found
*/
public static FileDataSourceFactory<?> lookupFactory(Path filePath) {
List<FileDataSourceFactory<?>> fileDataSourceFactories = DataSourceFactoryRegistry.INSTANCE.getFileFactories();
try {
Tika defaultTika = new Tika();
String mimeType = defaultTika.detect(filePath);
// go through file data sources and check for file MIME types first
for (FileDataSourceFactory<? extends FileDataSource> factory : fileDataSourceFactories) {
if (factory.getMimeTypes().contains(mimeType)) {
return factory;
}
}
} catch (IOException ioEx) {
// ignore
}
// In case the MIME type is unknown go through file data sources again and check for file
// ending first
for (FileDataSourceFactory<? extends FileDataSource> factory : fileDataSourceFactories) {
for (String fileExtension : factory.getFileExtensions()) {
String glob = String.format("glob:**.%s", fileExtension);
PathMatcher matcher = FileSystems.getDefault().getPathMatcher(glob);
if (matcher.matches(filePath)) {
return factory;
}
}
}
return null;
}
}