/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.extension.output;
import java.io.File;
import javax.inject.Inject;
import javax.inject.Named;
import org.datacleaner.api.Alias;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Distributed;
import org.datacleaner.api.HasLabelAdvice;
import org.datacleaner.api.Validate;
import org.datacleaner.beans.writers.WriteDataResult;
import org.datacleaner.beans.writers.WriteDataResultImpl;
import org.datacleaner.components.categories.WriteSuperCategory;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreCatalog;
import org.datacleaner.connection.JdbcDatastore;
import org.datacleaner.descriptors.FilterDescriptor;
import org.datacleaner.descriptors.TransformerDescriptor;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.output.OutputWriter;
import org.datacleaner.output.datastore.DatastoreCreationDelegate;
import org.datacleaner.output.datastore.DatastoreCreationDelegateImpl;
import org.datacleaner.output.datastore.DatastoreOutputWriterFactory;
import org.datacleaner.user.UserPreferences;
@Named("Create staging table")
@Alias("Write to Datastore")
@Description("Write data to DataCleaner's embedded staging database (based on H2), which provides a convenient "
+ "location for staging data or simply storing data temporarily for further analysis.")
@Categorized(superCategory = WriteSuperCategory.class)
@Distributed(false)
public class CreateStagingTableAnalyzer extends AbstractOutputWriterAnalyzer implements HasLabelAdvice {
/**
* Write mode for the datastore output analyzer. Determines if the datastore
* will be truncated before writing data or if a new/separate table should
* be created for this output.
*/
public enum WriteMode {
TRUNCATE, NEW_TABLE
}
static final String H2_DATABASE_CONNECTION_PROTOCOL = "jdbc:h2:";
static final String H2_DRIVER_CLASS_NAME = "org.h2.Driver";
@Configured(order = 1)
String datastoreName = "DataCleaner-staging";
@Configured(order = 2)
String tableName;
@Configured(order = 3)
@Description("Determines the behaviour in case of there's an existing datastore and table with the given names.")
WriteMode writeMode = WriteMode.TRUNCATE;
@Inject
UserPreferences userPreferences;
@Inject
DatastoreCatalog datastoreCatalog;
@Override
public void configureForFilterOutcome(final AnalysisJobBuilder ajb, final FilterDescriptor<?, ?> descriptor,
final String categoryName) {
final String dsName = ajb.getDatastoreConnection().getDatastore().getName();
tableName = "output-" + dsName + "-" + descriptor.getDisplayName() + "-" + categoryName;
}
@Override
public void configureForTransformedData(final AnalysisJobBuilder ajb, final TransformerDescriptor<?> descriptor) {
final String dsName = ajb.getDatastoreConnection().getDatastore().getName();
tableName = "output-" + dsName + "-" + descriptor.getDisplayName();
}
@Override
public String getSuggestedLabel() {
if (datastoreName == null || tableName == null) {
return null;
}
return datastoreName + " - " + tableName;
}
@Override
public OutputWriter createOutputWriter() {
final String[] headers = new String[columns.length];
for (int i = 0; i < headers.length; i++) {
headers[i] = columns[i].getName();
}
final boolean truncate = (writeMode == WriteMode.TRUNCATE);
final DatastoreCreationDelegate creationDelegate = new DatastoreCreationDelegateImpl(datastoreCatalog);
final File saveDatastoreDirectory =
(userPreferences == null ? new File("datastores") : userPreferences.getSaveDatastoreDirectory());
final OutputWriter outputWriter = DatastoreOutputWriterFactory
.getWriter(saveDatastoreDirectory, creationDelegate, datastoreName, tableName, truncate, columns);
// update the tablename property with the actual name (whitespace
// escaped etc.)
tableName = DatastoreOutputWriterFactory.getActualTableName(outputWriter);
return outputWriter;
}
@Override
protected WriteDataResult getResultInternal(final int rowCount) {
return new WriteDataResultImpl(rowCount, datastoreName, null, tableName);
}
public String getDatastoreName() {
return datastoreName;
}
public void setDatastoreName(final String datastoreName) {
this.datastoreName = datastoreName;
}
@Validate
public void validate() {
// The first time this method is invoked, the datastoreCatalog and
// userPreferences fields haven't been populated yet, therefore we just
// skip the check, because it is called a little bit later again, and
// then these fields are populated.
if (datastoreCatalog != null) {
// Validate that the datastoreName doesn't conflict with one of the
// datastores in the datastoreCatalog.
final Datastore datastore = datastoreCatalog.getDatastore(datastoreName);
if (datastore != null) {
if (datastore instanceof JdbcDatastore && ((JdbcDatastore) datastore).getDriverClass()
.equals(H2_DRIVER_CLASS_NAME)) {
if (!((JdbcDatastore) datastore).getJdbcUrl().startsWith(
H2_DATABASE_CONNECTION_PROTOCOL + userPreferences.getSaveDatastoreDirectory().getPath())) {
throw new IllegalStateException("Datastore \"" + datastoreName
+ "\" is not located in \"Written datastores\" directory \"" + userPreferences
.getSaveDatastoreDirectory().getPath() + "\".");
}
} else {
throw new IllegalStateException("Datastore \"" + datastoreName
+ "\" is not an H2 database, so it can't be used as a staging database.");
}
}
}
}
}