/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.beans.writers;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import javax.inject.Inject;
import org.apache.commons.lang.ArrayUtils;
import org.apache.metamodel.BatchUpdateScript;
import org.apache.metamodel.UpdateCallback;
import org.apache.metamodel.UpdateScript;
import org.apache.metamodel.UpdateableDataContext;
import org.apache.metamodel.create.TableCreationBuilder;
import org.apache.metamodel.csv.CsvDataContext;
import org.apache.metamodel.delete.RowDeletionBuilder;
import org.apache.metamodel.insert.RowInsertionBuilder;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.ColumnType;
import org.apache.metamodel.schema.Schema;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.Action;
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.Resource;
import org.eobjects.analyzer.beans.api.Analyzer;
import org.eobjects.analyzer.beans.api.AnalyzerBean;
import org.eobjects.analyzer.beans.api.Categorized;
import org.eobjects.analyzer.beans.api.ColumnProperty;
import org.eobjects.analyzer.beans.api.ComponentContext;
import org.eobjects.analyzer.beans.api.Concurrent;
import org.eobjects.analyzer.beans.api.Configured;
import org.eobjects.analyzer.beans.api.Description;
import org.eobjects.analyzer.beans.api.ExecutionLogMessage;
import org.eobjects.analyzer.beans.api.FileProperty;
import org.eobjects.analyzer.beans.api.FileProperty.FileAccessMode;
import org.eobjects.analyzer.beans.api.Initialize;
import org.eobjects.analyzer.beans.api.MappedProperty;
import org.eobjects.analyzer.beans.api.Provided;
import org.eobjects.analyzer.beans.api.SchemaProperty;
import org.eobjects.analyzer.beans.api.TableProperty;
import org.eobjects.analyzer.beans.api.Validate;
import org.eobjects.analyzer.beans.convert.ConvertToBooleanTransformer;
import org.eobjects.analyzer.beans.convert.ConvertToNumberTransformer;
import org.eobjects.analyzer.connection.CsvDatastore;
import org.eobjects.analyzer.connection.FileDatastore;
import org.eobjects.analyzer.connection.UpdateableDatastore;
import org.eobjects.analyzer.connection.UpdateableDatastoreConnection;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;
import org.eobjects.analyzer.util.HasLabelAdvice;
import org.eobjects.analyzer.util.SchemaNavigator;
import org.eobjects.analyzer.util.WriteBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@AnalyzerBean("Insert into table")
@Description("Insert records into a table in a registered datastore. This component allows you to map the values available in the flow with the columns of the target table, in order to insert these values into the table.")
@Categorized(WriteDataCategory.class)
@Concurrent(true)
public class InsertIntoTableAnalyzer implements Analyzer<WriteDataResult>, Action<Iterable<Object[]>>, HasLabelAdvice {
private static final String PROPERTY_NAME_VALUES = "Values";
private static final File TEMP_DIR = FileHelper.getTempDir();
private static final String ERROR_MESSAGE_COLUMN_NAME = "insert_into_table_error_message";
private static final Logger logger = LoggerFactory.getLogger(InsertIntoTableAnalyzer.class);
@Inject
@Configured(PROPERTY_NAME_VALUES)
@Description("Values to write to the table")
InputColumn<?>[] values;
@Inject
@Configured
@Description("Names of columns in the target table.")
@ColumnProperty
@MappedProperty(PROPERTY_NAME_VALUES)
String[] columnNames;
@Inject
@Configured
@Description("Datastore to write to")
UpdateableDatastore datastore;
@Inject
@Configured(required = false)
@Description("Schema name of target table")
@SchemaProperty
String schemaName;
@Inject
@Configured(required = false)
@Description("Table to target (insert into)")
@TableProperty
String tableName;
@Inject
@Configured
@Description("Truncate table before inserting?")
boolean truncateTable = false;
@Inject
@Configured("Buffer size")
@Description("How much data to buffer before committing batches of data. Large batches often perform better, but require more memory.")
WriteBufferSizeOption bufferSizeOption = WriteBufferSizeOption.MEDIUM;
@Inject
@Configured(value = "How to handle insertion errors?")
ErrorHandlingOption errorHandlingOption = ErrorHandlingOption.STOP_JOB;
@Inject
@Configured(value = "Error log file location", required = false)
@Description("Directory or file path for saving erroneous records")
@FileProperty(accessMode = FileAccessMode.SAVE, extension = ".csv")
File errorLogFile = TEMP_DIR;
@Inject
@Configured(required = false)
@Description("Additional values to write to error log")
InputColumn<?>[] additionalErrorLogValues;
@Inject
@Provided
ComponentContext _componentContext;
private Column[] _targetColumns;
private WriteBuffer _writeBuffer;
private AtomicInteger _writtenRowCount;
private AtomicInteger _errorRowCount;
private CsvDataContext _errorDataContext;
@Validate
public void validate() {
if (values.length != columnNames.length) {
throw new IllegalStateException("Length of 'Values' (" + values.length + ") and 'Column names' ("
+ columnNames.length + ") must be equal");
}
}
/**
* Truncates the database table if necesary. This is NOT a distributable
* initializer, since it can only happen once.
*/
@Initialize(distributed = false)
public void truncateIfNecesary() {
if (truncateTable) {
final UpdateableDatastoreConnection con = datastore.openConnection();
try {
final SchemaNavigator schemaNavigator = con.getSchemaNavigator();
final Table table = schemaNavigator.convertToTable(schemaName, tableName);
final UpdateableDataContext dc = con.getUpdateableDataContext();
dc.executeUpdate(new UpdateScript() {
@Override
public void run(UpdateCallback callback) {
final RowDeletionBuilder delete = callback.deleteFrom(table);
if (logger.isInfoEnabled()) {
logger.info("Executing truncating DELETE operation: {}", delete.toSql());
}
delete.execute();
}
});
} finally {
con.close();
}
}
}
@Initialize
public void init() throws IllegalArgumentException {
if (logger.isDebugEnabled()) {
logger.debug("At init() time, InputColumns are: {}", Arrays.toString(values));
}
_errorRowCount = new AtomicInteger();
_writtenRowCount = new AtomicInteger();
if (errorHandlingOption == ErrorHandlingOption.SAVE_TO_FILE) {
_errorDataContext = createErrorDataContext();
}
int bufferSize = bufferSizeOption.calculateBufferSize(values.length);
logger.info("Row buffer size set to {}", bufferSize);
_writeBuffer = new WriteBuffer(bufferSize, this);
final UpdateableDatastoreConnection con = datastore.openConnection();
try {
final SchemaNavigator schemaNavigator = con.getSchemaNavigator();
_targetColumns = schemaNavigator.convertToColumns(schemaName, tableName, columnNames);
final List<String> columnsNotFound = new ArrayList<String>();
for (int i = 0; i < _targetColumns.length; i++) {
if (_targetColumns[i] == null) {
columnsNotFound.add(columnNames[i]);
}
}
if (!columnsNotFound.isEmpty()) {
throw new IllegalArgumentException("Could not find column(s): " + columnsNotFound);
}
} finally {
con.close();
}
if (_targetColumns.length != values.length) {
throw new IllegalArgumentException("Configuration yielded unexpected target column count (got "
+ _targetColumns.length + ", expected " + values.length + ")");
}
}
@Override
public String getSuggestedLabel() {
if (datastore == null || tableName == null) {
return null;
}
return datastore.getName() + " - " + tableName;
}
private void validateCsvHeaders(CsvDataContext dc) {
Schema schema = dc.getDefaultSchema();
if (schema.getTableCount() == 0) {
// nothing to worry about, we will create the table ourselves
return;
}
Table table = schema.getTables()[0];
// verify that table names correspond to what we need!
for (String columnName : columnNames) {
Column column = table.getColumnByName(columnName);
if (column == null) {
throw new IllegalStateException("Error log file does not have required column header: " + columnName);
}
}
if (additionalErrorLogValues != null) {
for (InputColumn<?> inputColumn : additionalErrorLogValues) {
String columnName = translateAdditionalErrorLogColumnName(inputColumn.getName());
Column column = table.getColumnByName(columnName);
if (column == null) {
throw new IllegalStateException("Error log file does not have required column header: "
+ columnName);
}
}
}
Column column = table.getColumnByName(ERROR_MESSAGE_COLUMN_NAME);
if (column == null) {
throw new IllegalStateException("Error log file does not have required column: "
+ ERROR_MESSAGE_COLUMN_NAME);
}
}
private String translateAdditionalErrorLogColumnName(String columnName) {
if (ArrayUtils.contains(columnNames, columnName)) {
return translateAdditionalErrorLogColumnName(columnName + "_add");
}
return columnName;
}
private CsvDataContext createErrorDataContext() {
final File file;
if (errorLogFile == null || TEMP_DIR.equals(errorLogFile)) {
try {
file = File.createTempFile("insertion_error", ".csv");
} catch (IOException e) {
throw new IllegalStateException("Could not create new temp file", e);
}
} else if (errorLogFile.isDirectory()) {
file = new File(errorLogFile, "insertion_error_log.csv");
} else {
file = errorLogFile;
}
final CsvDataContext dc = new CsvDataContext(file);
final Schema schema = dc.getDefaultSchema();
if (file.exists() && file.length() > 0) {
validateCsvHeaders(dc);
} else {
// create table if no table exists.
dc.executeUpdate(new UpdateScript() {
@Override
public void run(UpdateCallback cb) {
TableCreationBuilder tableBuilder = cb.createTable(schema, "error_table");
for (String columnName : columnNames) {
tableBuilder = tableBuilder.withColumn(columnName);
}
if (additionalErrorLogValues != null) {
for (InputColumn<?> inputColumn : additionalErrorLogValues) {
String columnName = translateAdditionalErrorLogColumnName(inputColumn.getName());
tableBuilder = tableBuilder.withColumn(columnName);
}
}
tableBuilder = tableBuilder.withColumn(ERROR_MESSAGE_COLUMN_NAME);
tableBuilder.execute();
}
});
}
return dc;
}
@Override
public void run(InputRow row, int distinctCount) {
if (logger.isDebugEnabled()) {
logger.debug("At run() time, InputColumns are: {}", Arrays.toString(values));
}
final Object[] rowData;
if (additionalErrorLogValues == null) {
rowData = new Object[values.length];
} else {
rowData = new Object[values.length + additionalErrorLogValues.length];
}
for (int i = 0; i < values.length; i++) {
rowData[i] = row.getValue(values[i]);
}
if (additionalErrorLogValues != null) {
for (int i = 0; i < additionalErrorLogValues.length; i++) {
Object value = row.getValue(additionalErrorLogValues[i]);
rowData[values.length + i] = value;
}
}
try {
// perform conversion in a separate loop, since it might crash and
// the
// error data will be more complete if first loop finished.
for (int i = 0; i < values.length; i++) {
rowData[i] = convertType(rowData[i], _targetColumns[i]);
if (logger.isDebugEnabled()) {
logger.debug("Value for {} set to: {}", columnNames[i], rowData[i]);
}
}
} catch (RuntimeException e) {
for (int i = 0; i < distinctCount; i++) {
errorOccurred(rowData, e);
}
return;
}
if (logger.isDebugEnabled()) {
logger.debug("Adding row data to buffer: {}", Arrays.toString(rowData));
}
for (int i = 0; i < distinctCount; i++) {
_writeBuffer.addToBuffer(rowData);
}
}
private Object convertType(final Object value, Column targetColumn) throws IllegalArgumentException {
if (value == null) {
return null;
}
Object result = value;
ColumnType type = targetColumn.getType();
if (type.isLiteral()) {
// for strings, only convert some simple cases, since JDBC drivers
// typically also do a decent job here (with eg. Clob types, char[]
// types etc.)
if (value instanceof Number || value instanceof Date) {
result = value.toString();
}
} else if (type.isNumber()) {
Number numberValue = ConvertToNumberTransformer.transformValue(value);
if (numberValue == null && !"".equals(value)) {
throw new IllegalArgumentException("Could not convert " + value + " to number");
}
result = numberValue;
} else if (type == ColumnType.BOOLEAN) {
Boolean booleanValue = ConvertToBooleanTransformer.transformValue(value);
if (booleanValue == null && !"".equals(value)) {
throw new IllegalArgumentException("Could not convert " + value + " to boolean");
}
result = booleanValue;
}
return result;
}
@Override
public WriteDataResult getResult() {
_writeBuffer.flushBuffer();
final int writtenRowCount = _writtenRowCount.get();
final FileDatastore errorDatastore;
if (_errorDataContext != null) {
Resource resource = _errorDataContext.getResource();
errorDatastore = new CsvDatastore(resource.getName(), resource);
} else {
errorDatastore = null;
}
return new WriteDataResultImpl(writtenRowCount, datastore, schemaName, tableName, _errorRowCount.get(),
errorDatastore);
}
/**
* Method invoked when flushing the buffer
*/
@Override
public void run(final Iterable<Object[]> buffer) throws Exception {
final UpdateableDatastoreConnection con = datastore.openConnection();
try {
final Column[] columns = con.getSchemaNavigator().convertToColumns(schemaName, tableName, columnNames);
if (logger.isDebugEnabled()) {
logger.debug("Inserting into columns: {}", Arrays.toString(columns));
}
final UpdateableDataContext dc = con.getUpdateableDataContext();
dc.executeUpdate(new BatchUpdateScript() {
@Override
public void run(UpdateCallback callback) {
int insertCount = 0;
for (Object[] rowData : buffer) {
RowInsertionBuilder insertBuilder = callback.insertInto(columns[0].getTable());
for (int i = 0; i < columns.length; i++) {
insertBuilder = insertBuilder.value(columns[i], rowData[i]);
}
if (logger.isDebugEnabled()) {
logger.debug("Inserting: {}", Arrays.toString(rowData));
}
try {
insertBuilder.execute();
insertCount++;
_writtenRowCount.incrementAndGet();
} catch (final RuntimeException e) {
errorOccurred(rowData, e);
}
}
if (insertCount > 0) {
_componentContext.publishMessage(new ExecutionLogMessage(insertCount
+ " inserts executed"));
}
}
});
} finally {
con.close();
}
}
protected void errorOccurred(final Object[] rowData, final RuntimeException e) {
_errorRowCount.incrementAndGet();
if (errorHandlingOption == ErrorHandlingOption.STOP_JOB) {
throw e;
} else {
logger.warn("Error occurred while inserting record. Writing to error stream", e);
_errorDataContext.executeUpdate(new UpdateScript() {
@Override
public void run(UpdateCallback cb) {
RowInsertionBuilder insertBuilder = cb
.insertInto(_errorDataContext.getDefaultSchema().getTables()[0]);
for (int i = 0; i < columnNames.length; i++) {
insertBuilder = insertBuilder.value(columnNames[i], rowData[i]);
}
if (additionalErrorLogValues != null) {
for (int i = 0; i < additionalErrorLogValues.length; i++) {
String columnName = translateAdditionalErrorLogColumnName(additionalErrorLogValues[i]
.getName());
Object value = rowData[columnNames.length + i];
insertBuilder = insertBuilder.value(columnName, value);
}
}
insertBuilder = insertBuilder.value(ERROR_MESSAGE_COLUMN_NAME, e.getMessage());
insertBuilder.execute();
}
});
}
}
}