/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.util;
import java.io.File;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import org.apache.commons.io.FilenameUtils;
import org.apache.metamodel.csv.CsvConfiguration;
import org.apache.metamodel.util.FileResource;
import org.apache.metamodel.util.Resource;
import org.datacleaner.connection.AccessDatastore;
import org.datacleaner.connection.CsvDatastore;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreCatalog;
import org.datacleaner.connection.DbaseDatastore;
import org.datacleaner.connection.ExcelDatastore;
import org.datacleaner.connection.JsonDatastore;
import org.datacleaner.connection.OdbDatastore;
import org.datacleaner.connection.SasDatastore;
import org.datacleaner.connection.XmlDatastore;
public class DatastoreCreationUtil {
public enum FileDatastoreEnum {
CSV("csv", "tsv", "txt", "dat"), EXCEL("xls", "xlsx"), ACCESS("mdb"),
// TODO: Add .sas7bdat file support
SAS(), DBASE("dbf"), XML("xml"), JSON("json"), OPENOFFICE("odb");
private final List<String> _extensions;
FileDatastoreEnum(final String... extensions) {
_extensions = Arrays.asList(extensions);
}
protected static FileDatastoreEnum getDatastoreTypeFromResource(final Resource resource) {
final String extension = FilenameUtils.getExtension(resource.getName());
for (final FileDatastoreEnum datastoreType : EnumSet.allOf(FileDatastoreEnum.class)) {
if (datastoreType._extensions.contains(extension.toLowerCase())) {
return datastoreType;
}
}
return null;
}
}
public static FileDatastoreEnum inferDatastoreTypeFromResource(final Resource resource) {
if (resource instanceof FileResource) {
final FileResource fileResource = (FileResource) resource;
final File file = fileResource.getFile();
if (file.isDirectory()) {
return FileDatastoreEnum.SAS;
}
}
return FileDatastoreEnum.getDatastoreTypeFromResource(resource);
}
public static Datastore createUniqueDatastoreFromResource(final DatastoreCatalog catalog,
final Resource resource) {
String name = resource.getName();
if (catalog.containsDatastore(name)) {
final String originalName = name;
int prefix = 1;
do {
name = originalName + "_" + prefix++;
} while (catalog.containsDatastore(name));
}
return createDatastoreFromResource(resource, name);
}
public static Datastore createDatastoreFromResource(final Resource resource, final String datastoreName) {
return createDatastoreFromEnum(inferDatastoreTypeFromResource(resource), resource, datastoreName);
}
public static Datastore createDatastoreFromEnum(final FileDatastoreEnum fileDatastore, final Resource resource,
final String datastoreName) {
if (fileDatastore == null) {
throw new IllegalArgumentException("Unrecognized file type for: " + resource.getQualifiedPath());
}
switch (fileDatastore) {
case CSV:
final CsvConfigurationDetection detection = new CsvConfigurationDetection(resource);
final CsvConfiguration csvConfiguration = detection.suggestCsvConfiguration();
return new CsvDatastore(datastoreName, resource, csvConfiguration);
case EXCEL:
return new ExcelDatastore(datastoreName, resource, resource.getQualifiedPath());
case ACCESS:
return new AccessDatastore(datastoreName, resource.getQualifiedPath());
case SAS:
final FileResource fileResource = (FileResource) resource;
return new SasDatastore(datastoreName, fileResource.getFile());
case DBASE:
return new DbaseDatastore(datastoreName, resource.getQualifiedPath());
case JSON:
return new JsonDatastore(datastoreName, resource);
case OPENOFFICE:
return new OdbDatastore(datastoreName, resource.getQualifiedPath());
case XML:
return new XmlDatastore(datastoreName, resource.getQualifiedPath());
default:
throw new IllegalArgumentException("No such datastore type");
}
}
}