/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.standard; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.ReadsAttribute; import org.apache.nifi.annotation.behavior.ReadsAttributes; import org.apache.nifi.annotation.behavior.SupportsBatching; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.SeeAlso; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.dbcp.DBCPService; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.flowfile.attributes.FragmentAttributes; import org.apache.nifi.processor.AbstractSessionFactoryProcessor; import org.apache.nifi.processor.FlowFileFilter; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.ProcessSessionFactory; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.InputStreamCallback; import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.processor.util.pattern.ErrorTypes; import org.apache.nifi.processor.util.pattern.ExceptionHandler; import org.apache.nifi.processor.util.pattern.PartialFunctions; import org.apache.nifi.processor.util.pattern.PartialFunctions.FetchFlowFiles; import org.apache.nifi.processor.util.pattern.PartialFunctions.FlowFileGroup; import org.apache.nifi.processor.util.pattern.PutGroup; import org.apache.nifi.processor.util.pattern.RollbackOnFailure; import org.apache.nifi.processor.util.pattern.RoutingResult; import org.apache.nifi.stream.io.StreamUtils; import javax.xml.bind.DatatypeConverter; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; import java.sql.BatchUpdateException; import java.sql.Connection; import java.sql.Date; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLDataException; import java.sql.SQLException; import java.sql.SQLNonTransientException; import java.sql.Statement; import java.sql.Time; import java.sql.Timestamp; import java.sql.Types; import java.text.ParseException; import java.text.SimpleDateFormat; import java.time.Instant; import java.time.format.DateTimeFormatter; import java.time.temporal.TemporalAccessor; import java.util.ArrayList; import java.util.BitSet; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.BiFunction; import java.util.regex.Matcher; import java.util.regex.Pattern; import static org.apache.nifi.processor.util.pattern.ExceptionHandler.createOnError; @SupportsBatching @SeeAlso(ConvertJSONToSQL.class) @InputRequirement(Requirement.INPUT_REQUIRED) @Tags({"sql", "put", "rdbms", "database", "update", "insert", "relational"}) @CapabilityDescription("Executes a SQL UPDATE or INSERT command. The content of an incoming FlowFile is expected to be the SQL command " + "to execute. The SQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes " + "with the naming convention sql.args.N.type and sql.args.N.value, where N is a positive integer. The sql.args.N.type is expected to be " + "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.") @ReadsAttributes({ @ReadsAttribute(attribute = "fragment.identifier", description = "If the <Support Fragment Transactions> property is true, this attribute is used to determine whether or " + "not two FlowFiles belong to the same transaction."), @ReadsAttribute(attribute = "fragment.count", description = "If the <Support Fragment Transactions> property is true, this attribute is used to determine how many FlowFiles " + "are needed to complete the transaction."), @ReadsAttribute(attribute = "fragment.index", description = "If the <Support Fragment Transactions> property is true, this attribute is used to determine the order that the FlowFiles " + "in a transaction should be evaluated."), @ReadsAttribute(attribute = "sql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized SQL statements. The type of each Parameter is specified as an integer " + "that represents the JDBC Type of the parameter."), @ReadsAttribute(attribute = "sql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized SQL statements. The value of the Parameters are specified as " + "sql.args.1.value, sql.args.2.value, sql.args.3.value, and so on. The type of the sql.args.1.value Parameter is specified by the sql.args.1.type attribute."), @ReadsAttribute(attribute = "sql.args.N.format", description = "This attribute is always optional, but default options may not always work for your data. " + "Incoming FlowFiles are expected to be parametrized SQL statements. In some cases " + "a format option needs to be specified, currently this is only applicable for binary data types and timestamps. For binary data types " + "available options are 'ascii', 'base64' and 'hex'. In 'ascii' format each string character in your attribute value represents a single byte, this is the default format " + "and the format provided by Avro Processors. In 'base64' format your string is a Base64 encoded string. In 'hex' format the string is hex encoded with all " + "letters in upper case and no '0x' at the beginning. For timestamps, the format can be specified according to java.time.format.DateTimeFormatter." + "Customer and named patterns are accepted i.e. ('yyyy-MM-dd','ISO_OFFSET_DATE_TIME')") }) @WritesAttributes({ @WritesAttribute(attribute = "sql.generated.key", description = "If the database generated a key for an INSERT statement and the Obtain Generated Keys property is set to true, " + "this attribute will be added to indicate the generated key, if possible. This feature is not supported by all database vendors.") }) public class PutSQL extends AbstractSessionFactoryProcessor { static final PropertyDescriptor CONNECTION_POOL = new PropertyDescriptor.Builder() .name("JDBC Connection Pool") .description("Specifies the JDBC Connection Pool to use in order to convert the JSON message to a SQL statement. " + "The Connection Pool is necessary in order to determine the appropriate database column types.") .identifiesControllerService(DBCPService.class) .required(true) .build(); static final PropertyDescriptor SUPPORT_TRANSACTIONS = new PropertyDescriptor.Builder() .name("Support Fragmented Transactions") .description("If true, when a FlowFile is consumed by this Processor, the Processor will first check the fragment.identifier and fragment.count attributes of that FlowFile. " + "If the fragment.count value is greater than 1, the Processor will not process any FlowFile with that fragment.identifier until all are available; " + "at that point, it will process all FlowFiles with that fragment.identifier as a single transaction, in the order specified by the FlowFiles' fragment.index attributes. " + "This Provides atomicity of those SQL statements. If this value is false, these attributes will be ignored and the updates will occur independent of one another.") .allowableValues("true", "false") .defaultValue("true") .build(); static final PropertyDescriptor TRANSACTION_TIMEOUT = new PropertyDescriptor.Builder() .name("Transaction Timeout") .description("If the <Support Fragmented Transactions> property is set to true, specifies how long to wait for all FlowFiles for a particular fragment.identifier attribute " + "to arrive before just transferring all of the FlowFiles with that identifier to the 'failure' relationship") .required(false) .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) .build(); static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder() .name("Batch Size") .description("The preferred number of FlowFiles to put to the database in a single transaction") .required(true) .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) .defaultValue("100") .build(); static final PropertyDescriptor OBTAIN_GENERATED_KEYS = new PropertyDescriptor.Builder() .name("Obtain Generated Keys") .description("If true, any key that is automatically generated by the database will be added to the FlowFile that generated it using the sql.generate.key attribute. " + "This may result in slightly slower performance and is not supported by all databases.") .allowableValues("true", "false") .defaultValue("false") .build(); static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("A FlowFile is routed to this relationship after the database is successfully updated") .build(); static final Relationship REL_RETRY = new Relationship.Builder() .name("retry") .description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed") .build(); static final Relationship REL_FAILURE = new Relationship.Builder() .name("failure") .description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, " + "such as an invalid query or an integrity constraint violation") .build(); private static final Pattern SQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("sql\\.args\\.(\\d+)\\.type"); private static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+"); private static final String FRAGMENT_ID_ATTR = FragmentAttributes.FRAGMENT_ID.key(); private static final String FRAGMENT_INDEX_ATTR = FragmentAttributes.FRAGMENT_INDEX.key(); private static final String FRAGMENT_COUNT_ATTR = FragmentAttributes.FRAGMENT_COUNT.key(); private static final Pattern LONG_PATTERN = Pattern.compile("^\\d{1,19}$"); @Override protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { final List<PropertyDescriptor> properties = new ArrayList<>(); properties.add(CONNECTION_POOL); properties.add(SUPPORT_TRANSACTIONS); properties.add(TRANSACTION_TIMEOUT); properties.add(BATCH_SIZE); properties.add(OBTAIN_GENERATED_KEYS); properties.add(RollbackOnFailure.ROLLBACK_ON_FAILURE); return properties; } @Override public Set<Relationship> getRelationships() { final Set<Relationship> rels = new HashSet<>(); rels.add(REL_SUCCESS); rels.add(REL_RETRY); rels.add(REL_FAILURE); return rels; } private static class FunctionContext extends RollbackOnFailure { private boolean obtainKeys = false; private boolean fragmentedTransaction = false; private boolean originalAutoCommit = false; private final long startNanos = System.nanoTime(); private FunctionContext(boolean rollbackOnFailure) { super(rollbackOnFailure, true); } private boolean isSupportBatching() { return !obtainKeys && !fragmentedTransaction; } } private PutGroup<FunctionContext, Connection, StatementFlowFileEnclosure> process; private BiFunction<FunctionContext, ErrorTypes, ErrorTypes.Result> adjustError; private ExceptionHandler<FunctionContext> exceptionHandler; private final FetchFlowFiles<FunctionContext> fetchFlowFiles = (c, s, fc, r) -> { final FlowFilePoll poll = pollFlowFiles(c, s, fc, r); if (poll == null) { return null; } fc.fragmentedTransaction = poll.isFragmentedTransaction(); return poll.getFlowFiles(); }; private final PartialFunctions.InitConnection<FunctionContext, Connection> initConnection = (c, s, fc) -> { final Connection connection = c.getProperty(CONNECTION_POOL).asControllerService(DBCPService.class).getConnection(); try { fc.originalAutoCommit = connection.getAutoCommit(); connection.setAutoCommit(false); } catch (SQLException e) { throw new ProcessException("Failed to disable auto commit due to " + e, e); } return connection; }; @FunctionalInterface private interface GroupingFunction { void apply(final ProcessContext context, final ProcessSession session, final FunctionContext fc, final Connection conn, final List<FlowFile> flowFiles, final List<StatementFlowFileEnclosure> groups, final Map<String, StatementFlowFileEnclosure> sqlToEnclosure, final RoutingResult result); } private GroupingFunction groupFragmentedTransaction = (context, session, fc, conn, flowFiles, groups, sqlToEnclosure, result) -> { final FragmentedEnclosure fragmentedEnclosure = new FragmentedEnclosure(); groups.add(fragmentedEnclosure); for (final FlowFile flowFile : flowFiles) { final String sql = getSQL(session, flowFile); final StatementFlowFileEnclosure enclosure = sqlToEnclosure .computeIfAbsent(sql, k -> new StatementFlowFileEnclosure(sql)); fragmentedEnclosure.addFlowFile(flowFile, enclosure); } }; private final GroupingFunction groupFlowFilesBySQLBatch = (context, session, fc, conn, flowFiles, groups, sqlToEnclosure, result) -> { for (final FlowFile flowFile : flowFiles) { final String sql = getSQL(session, flowFile); // Get or create the appropriate PreparedStatement to use. final StatementFlowFileEnclosure enclosure = sqlToEnclosure .computeIfAbsent(sql, k -> { final StatementFlowFileEnclosure newEnclosure = new StatementFlowFileEnclosure(sql); groups.add(newEnclosure); return newEnclosure; }); if(!exceptionHandler.execute(fc, flowFile, input -> { final PreparedStatement stmt = enclosure.getCachedStatement(conn); setParameters(stmt, flowFile.getAttributes()); stmt.addBatch(); }, onFlowFileError(context, session, result))) { continue; } enclosure.addFlowFile(flowFile); } }; private GroupingFunction groupFlowFilesBySQL = (context, session, fc, conn, flowFiles, groups, sqlToEnclosure, result) -> { for (final FlowFile flowFile : flowFiles) { final String sql = getSQL(session, flowFile); // Get or create the appropriate PreparedStatement to use. final StatementFlowFileEnclosure enclosure = sqlToEnclosure .computeIfAbsent(sql, k -> { final StatementFlowFileEnclosure newEnclosure = new StatementFlowFileEnclosure(sql); groups.add(newEnclosure); return newEnclosure; }); enclosure.addFlowFile(flowFile); } }; final PutGroup.GroupFlowFiles<FunctionContext, Connection, StatementFlowFileEnclosure> groupFlowFiles = (context, session, fc, conn, flowFiles, result) -> { final Map<String, StatementFlowFileEnclosure> sqlToEnclosure = new HashMap<>(); final List<StatementFlowFileEnclosure> groups = new ArrayList<>(); // There are three patterns: // 1. Support batching: An enclosure has multiple FlowFiles being executed in a batch operation // 2. Obtain keys: An enclosure has multiple FlowFiles, and each FlowFile is executed separately // 3. Fragmented transaction: One FlowFile per Enclosure? if (fc.obtainKeys) { groupFlowFilesBySQL.apply(context, session, fc, conn, flowFiles, groups, sqlToEnclosure, result); } else if (fc.fragmentedTransaction) { groupFragmentedTransaction.apply(context, session, fc, conn, flowFiles, groups, sqlToEnclosure, result); } else { groupFlowFilesBySQLBatch.apply(context, session, fc, conn, flowFiles, groups, sqlToEnclosure, result); } return groups; }; final PutGroup.PutFlowFiles<FunctionContext, Connection, StatementFlowFileEnclosure> putFlowFiles = (context, session, fc, conn, enclosure, result) -> { if (fc.isSupportBatching()) { // We have PreparedStatement that have batches added to them. // We need to execute each batch and close the PreparedStatement. exceptionHandler.execute(fc, enclosure, input -> { try (final PreparedStatement stmt = enclosure.getCachedStatement(conn)) { stmt.executeBatch(); result.routeTo(enclosure.getFlowFiles(), REL_SUCCESS); } }, onBatchUpdateError(context, session, result)); } else { for (final FlowFile flowFile : enclosure.getFlowFiles()) { final StatementFlowFileEnclosure targetEnclosure = enclosure instanceof FragmentedEnclosure ? ((FragmentedEnclosure) enclosure).getTargetEnclosure(flowFile) : enclosure; // Execute update one by one. exceptionHandler.execute(fc, flowFile, input -> { try (final PreparedStatement stmt = targetEnclosure.getNewStatement(conn, fc.obtainKeys)) { // set the appropriate parameters on the statement. setParameters(stmt, flowFile.getAttributes()); stmt.executeUpdate(); // attempt to determine the key that was generated, if any. This is not supported by all // database vendors, so if we cannot determine the generated key (or if the statement is not an INSERT), // we will just move on without setting the attribute. FlowFile sentFlowFile = flowFile; final String generatedKey = determineGeneratedKey(stmt); if (generatedKey != null) { sentFlowFile = session.putAttribute(sentFlowFile, "sql.generated.key", generatedKey); } result.routeTo(sentFlowFile, REL_SUCCESS); } }, onFlowFileError(context, session, result)); } } if (result.contains(REL_SUCCESS)) { // Determine the database URL String url = "jdbc://unknown-host"; try { url = conn.getMetaData().getURL(); } catch (final SQLException sqle) { } // Emit a Provenance SEND event final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - fc.startNanos); for (final FlowFile flowFile : result.getRoutedFlowFiles().get(REL_SUCCESS)) { session.getProvenanceReporter().send(flowFile, url, transmissionMillis, true); } } }; private ExceptionHandler.OnError<FunctionContext, FlowFile> onFlowFileError(final ProcessContext context, final ProcessSession session, final RoutingResult result) { ExceptionHandler.OnError<FunctionContext, FlowFile> onFlowFileError = createOnError(context, session, result, REL_FAILURE, REL_RETRY); onFlowFileError = onFlowFileError.andThen((c, i, r, e) -> { switch (r.destination()) { case Failure: getLogger().error("Failed to update database for {} due to {}; routing to failure", new Object[] {i, e}, e); break; case Retry: getLogger().error("Failed to update database for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry", new Object[] {i, e}, e); break; } }); return RollbackOnFailure.createOnError(onFlowFileError); } private ExceptionHandler.OnError<FunctionContext, StatementFlowFileEnclosure> onBatchUpdateError( final ProcessContext context, final ProcessSession session, final RoutingResult result) { return RollbackOnFailure.createOnError((c, enclosure, r, e) -> { // If rollbackOnFailure is enabled, the error will be thrown as ProcessException instead. if (e instanceof BatchUpdateException && !c.isRollbackOnFailure()) { // If we get a BatchUpdateException, then we want to determine which FlowFile caused the failure, // and route that FlowFile to failure while routing those that finished processing to success and those // that have not yet been executed to retry. // Currently fragmented transaction does not use batch update. final int[] updateCounts = ((BatchUpdateException) e).getUpdateCounts(); final List<FlowFile> batchFlowFiles = enclosure.getFlowFiles(); // In the presence of a BatchUpdateException, the driver has the option of either stopping when an error // occurs, or continuing. If it continues, then it must account for all statements in the batch and for // those that fail return a Statement.EXECUTE_FAILED for the number of rows updated. // So we will iterate over all of the update counts returned. If any is equal to Statement.EXECUTE_FAILED, // we will route the corresponding FlowFile to failure. Otherwise, the FlowFile will go to success // unless it has not yet been processed (its index in the List > updateCounts.length). int failureCount = 0; int successCount = 0; int retryCount = 0; for (int i = 0; i < updateCounts.length; i++) { final int updateCount = updateCounts[i]; final FlowFile flowFile = batchFlowFiles.get(i); if (updateCount == Statement.EXECUTE_FAILED) { result.routeTo(flowFile, REL_FAILURE); failureCount++; } else { result.routeTo(flowFile, REL_SUCCESS); successCount++; } } if (failureCount == 0) { // if no failures found, the driver decided not to execute the statements after the // failure, so route the last one to failure. final FlowFile failedFlowFile = batchFlowFiles.get(updateCounts.length); result.routeTo(failedFlowFile, REL_FAILURE); failureCount++; } if (updateCounts.length < batchFlowFiles.size()) { final List<FlowFile> unexecuted = batchFlowFiles.subList(updateCounts.length + 1, batchFlowFiles.size()); for (final FlowFile flowFile : unexecuted) { result.routeTo(flowFile, REL_RETRY); retryCount++; } } getLogger().error("Failed to update database due to a failed batch update. There were a total of {} FlowFiles that failed, {} that succeeded, " + "and {} that were not execute and will be routed to retry; ", new Object[]{failureCount, successCount, retryCount}); return; } // Apply default error handling and logging for other Exceptions. ExceptionHandler.OnError<RollbackOnFailure, FlowFileGroup> onGroupError = ExceptionHandler.createOnGroupError(context, session, result, REL_FAILURE, REL_RETRY); onGroupError = onGroupError.andThen((cl, il, rl, el) -> { switch (r.destination()) { case Failure: getLogger().error("Failed to update database for {} due to {}; routing to failure", new Object[] {il.getFlowFiles(), e}, e); break; case Retry: getLogger().error("Failed to update database for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry", new Object[] {il.getFlowFiles(), e}, e); break; } }); onGroupError.apply(c, enclosure, r, e); }); } @OnScheduled public void constructProcess() { process = new PutGroup<>(); process.setLogger(getLogger()); process.fetchFlowFiles(fetchFlowFiles); process.initConnection(initConnection); process.groupFetchedFlowFiles(groupFlowFiles); process.putFlowFiles(putFlowFiles); process.adjustRoute(RollbackOnFailure.createAdjustRoute(REL_FAILURE, REL_RETRY)); process.onCompleted((c, s, fc, conn) -> { try { conn.commit(); } catch (SQLException e) { // Throw ProcessException to rollback process session. throw new ProcessException("Failed to commit database connection due to " + e, e); } }); process.onFailed((c, s, fc, conn, e) -> { try { conn.rollback(); } catch (SQLException re) { // Just log the fact that rollback failed. // ProcessSession will be rollback by the thrown Exception so don't have to do anything here. getLogger().warn("Failed to rollback database connection due to %s", new Object[]{re}, re); } }); process.cleanup((c, s, fc, conn) -> { // make sure that we try to set the auto commit back to whatever it was. if (fc.originalAutoCommit) { try { conn.setAutoCommit(true); } catch (final SQLException se) { getLogger().warn("Failed to reset autocommit due to {}", new Object[]{se}); } } }); exceptionHandler = new ExceptionHandler<>(); exceptionHandler.mapException(e -> { if (e instanceof SQLNonTransientException) { return ErrorTypes.InvalidInput; } else if (e instanceof SQLException) { return ErrorTypes.TemporalFailure; } else { return ErrorTypes.UnknownFailure; } }); adjustError = RollbackOnFailure.createAdjustError(getLogger()); exceptionHandler.adjustError(adjustError); } @Override public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { final Boolean rollbackOnFailure = context.getProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE).asBoolean(); final FunctionContext functionContext = new FunctionContext(rollbackOnFailure); functionContext.obtainKeys = context.getProperty(OBTAIN_GENERATED_KEYS).asBoolean(); RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> process.onTrigger(context, session, functionContext)); } /** * Pulls a batch of FlowFiles from the incoming queues. If no FlowFiles are available, returns <code>null</code>. * Otherwise, a List of FlowFiles will be returned. * * If all FlowFiles pulled are not eligible to be processed, the FlowFiles will be penalized and transferred back * to the input queue and an empty List will be returned. * * Otherwise, if the Support Fragmented Transactions property is true, all FlowFiles that belong to the same * transaction will be sorted in the order that they should be evaluated. * * @param context the process context for determining properties * @param session the process session for pulling flowfiles * @return a FlowFilePoll containing a List of FlowFiles to process, or <code>null</code> if there are no FlowFiles to process */ private FlowFilePoll pollFlowFiles(final ProcessContext context, final ProcessSession session, final FunctionContext functionContext, final RoutingResult result) { // Determine which FlowFile Filter to use in order to obtain FlowFiles. final boolean useTransactions = context.getProperty(SUPPORT_TRANSACTIONS).asBoolean(); boolean fragmentedTransaction = false; final int batchSize = context.getProperty(BATCH_SIZE).asInteger(); List<FlowFile> flowFiles; if (useTransactions) { final TransactionalFlowFileFilter filter = new TransactionalFlowFileFilter(); flowFiles = session.get(filter); fragmentedTransaction = filter.isFragmentedTransaction(); } else { flowFiles = session.get(batchSize); } if (flowFiles.isEmpty()) { return null; } // If we are supporting fragmented transactions, verify that all FlowFiles are correct if (fragmentedTransaction) { try { if (!isFragmentedTransactionReady(flowFiles, context.getProperty(TRANSACTION_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS))) { // Not ready, penalize FlowFiles and put it back to self. flowFiles.forEach(f -> result.routeTo(session.penalize(f), Relationship.SELF)); return null; } } catch (IllegalArgumentException e) { // Map relationship based on context, and then let default handler to handle. final ErrorTypes.Result adjustedRoute = adjustError.apply(functionContext, ErrorTypes.InvalidInput); ExceptionHandler.createOnGroupError(context, session, result, REL_FAILURE, REL_RETRY) .apply(functionContext, () -> flowFiles, adjustedRoute, e); return null; } // sort by fragment index. flowFiles.sort(Comparator.comparing(o -> Integer.parseInt(o.getAttribute(FRAGMENT_INDEX_ATTR)))); } return new FlowFilePoll(flowFiles, fragmentedTransaction); } /** * Returns the key that was generated from the given statement, or <code>null</code> if no key * was generated or it could not be determined. * * @param stmt the statement that generated a key * @return the key that was generated from the given statement, or <code>null</code> if no key * was generated or it could not be determined. */ private String determineGeneratedKey(final PreparedStatement stmt) { try { final ResultSet generatedKeys = stmt.getGeneratedKeys(); if (generatedKeys != null && generatedKeys.next()) { return generatedKeys.getString(1); } } catch (final SQLException sqle) { // This is not supported by all vendors. This is a best-effort approach. } return null; } /** * Determines the SQL statement that should be executed for the given FlowFile * * @param session the session that can be used to access the given FlowFile * @param flowFile the FlowFile whose SQL statement should be executed * * @return the SQL that is associated with the given FlowFile */ private String getSQL(final ProcessSession session, final FlowFile flowFile) { // Read the SQL from the FlowFile's content final byte[] buffer = new byte[(int) flowFile.getSize()]; session.read(flowFile, new InputStreamCallback() { @Override public void process(final InputStream in) throws IOException { StreamUtils.fillBuffer(in, buffer); } }); // Create the PreparedStatement to use for this FlowFile. final String sql = new String(buffer, StandardCharsets.UTF_8); return sql; } /** * Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes. * * @param stmt the statement to set the parameters on * @param attributes the attributes from which to derive parameter indices, values, and types * @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called */ private void setParameters(final PreparedStatement stmt, final Map<String, String> attributes) throws SQLException { for (final Map.Entry<String, String> entry : attributes.entrySet()) { final String key = entry.getKey(); final Matcher matcher = SQL_TYPE_ATTRIBUTE_PATTERN.matcher(key); if (matcher.matches()) { final int parameterIndex = Integer.parseInt(matcher.group(1)); final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches(); if (!isNumeric) { throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral type"); } final int jdbcType = Integer.parseInt(entry.getValue()); final String valueAttrName = "sql.args." + parameterIndex + ".value"; final String parameterValue = attributes.get(valueAttrName); final String formatAttrName = "sql.args." + parameterIndex + ".format"; final String parameterFormat = attributes.containsKey(formatAttrName)? attributes.get(formatAttrName):""; try { setParameter(stmt, valueAttrName, parameterIndex, parameterValue, jdbcType, parameterFormat); } catch (final NumberFormatException nfe) { throw new SQLDataException("The value of the " + valueAttrName + " is '" + parameterValue + "', which cannot be converted into the necessary data type", nfe); } catch (ParseException pe) { throw new SQLDataException("The value of the " + valueAttrName + " is '" + parameterValue + "', which cannot be converted to a timestamp", pe); } catch (UnsupportedEncodingException uee) { throw new SQLDataException("The value of the " + valueAttrName + " is '" + parameterValue + "', which cannot be converted to UTF-8", uee); } } } } /** * Determines which relationship the given FlowFiles should go to, based on a transaction timing out or * transaction information not being present. If the FlowFiles should be processed and not transferred * to any particular relationship yet, will return <code>null</code> * * @param flowFiles the FlowFiles whose relationship is to be determined * @param transactionTimeoutMillis the maximum amount of time (in milliseconds) that we should wait * for all FlowFiles in a transaction to be present before routing to failure * @return the appropriate relationship to route the FlowFiles to, or <code>null</code> if the FlowFiles * should instead be processed */ boolean isFragmentedTransactionReady(final List<FlowFile> flowFiles, final Long transactionTimeoutMillis) throws IllegalArgumentException { int selectedNumFragments = 0; final BitSet bitSet = new BitSet(); BiFunction<String, Object[], IllegalArgumentException> illegal = (s, objects) -> new IllegalArgumentException(String.format(s, objects)); for (final FlowFile flowFile : flowFiles) { final String fragmentCount = flowFile.getAttribute(FRAGMENT_COUNT_ATTR); if (fragmentCount == null && flowFiles.size() == 1) { return true; } else if (fragmentCount == null) { throw illegal.apply("Cannot process %s because there are %d FlowFiles with the same fragment.identifier " + "attribute but not all FlowFiles have a fragment.count attribute", new Object[] {flowFile, flowFiles.size()}); } final int numFragments; try { numFragments = Integer.parseInt(fragmentCount); } catch (final NumberFormatException nfe) { throw illegal.apply("Cannot process %s because the fragment.count attribute has a value of '%s', which is not an integer", new Object[] {flowFile, fragmentCount}); } if (numFragments < 1) { throw illegal.apply("Cannot process %s because the fragment.count attribute has a value of '%s', which is not a positive integer", new Object[] {flowFile, fragmentCount}); } if (selectedNumFragments == 0) { selectedNumFragments = numFragments; } else if (numFragments != selectedNumFragments) { throw illegal.apply("Cannot process %s because the fragment.count attribute has different values for different FlowFiles with the same fragment.identifier", new Object[] {flowFile}); } final String fragmentIndex = flowFile.getAttribute(FRAGMENT_INDEX_ATTR); if (fragmentIndex == null) { throw illegal.apply("Cannot process %s because the fragment.index attribute is missing", new Object[] {flowFile}); } final int idx; try { idx = Integer.parseInt(fragmentIndex); } catch (final NumberFormatException nfe) { throw illegal.apply("Cannot process %s because the fragment.index attribute has a value of '%s', which is not an integer", new Object[] {flowFile, fragmentIndex}); } if (idx < 0) { throw illegal.apply("Cannot process %s because the fragment.index attribute has a value of '%s', which is not a positive integer", new Object[] {flowFile, fragmentIndex}); } if (bitSet.get(idx)) { throw illegal.apply("Cannot process %s because it has the same value for the fragment.index attribute as another FlowFile with the same fragment.identifier", new Object[] {flowFile}); } bitSet.set(idx); } if (selectedNumFragments == flowFiles.size()) { return true; // no relationship to route FlowFiles to yet - process the FlowFiles. } long latestQueueTime = 0L; for (final FlowFile flowFile : flowFiles) { if (flowFile.getLastQueueDate() != null && flowFile.getLastQueueDate() > latestQueueTime) { latestQueueTime = flowFile.getLastQueueDate(); } } if (transactionTimeoutMillis != null) { if (latestQueueTime > 0L && System.currentTimeMillis() - latestQueueTime > transactionTimeoutMillis) { throw illegal.apply("The transaction timeout has expired for the following FlowFiles; they will be routed to failure: %s", new Object[] {flowFiles}); } } getLogger().debug("Not enough FlowFiles for transaction. Returning all FlowFiles to queue"); return false; // not enough FlowFiles for this transaction. Return them all to queue. } /** * Determines how to map the given value to the appropriate JDBC data type and sets the parameter on the * provided PreparedStatement * * @param stmt the PreparedStatement to set the parameter on * @param attrName the name of the attribute that the parameter is coming from - for logging purposes * @param parameterIndex the index of the SQL parameter to set * @param parameterValue the value of the SQL parameter to set * @param jdbcType the JDBC Type of the SQL parameter to set * @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter */ private void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType, final String valueFormat) throws SQLException, ParseException, UnsupportedEncodingException { if (parameterValue == null) { stmt.setNull(parameterIndex, jdbcType); } else { switch (jdbcType) { case Types.BIT: stmt.setBoolean(parameterIndex, "1".equals(parameterValue) || "t".equalsIgnoreCase(parameterValue) || Boolean.parseBoolean(parameterValue)); break; case Types.BOOLEAN: stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue)); break; case Types.TINYINT: stmt.setByte(parameterIndex, Byte.parseByte(parameterValue)); break; case Types.SMALLINT: stmt.setShort(parameterIndex, Short.parseShort(parameterValue)); break; case Types.INTEGER: stmt.setInt(parameterIndex, Integer.parseInt(parameterValue)); break; case Types.BIGINT: stmt.setLong(parameterIndex, Long.parseLong(parameterValue)); break; case Types.REAL: stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue)); break; case Types.FLOAT: case Types.DOUBLE: stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue)); break; case Types.DECIMAL: case Types.NUMERIC: stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue)); break; case Types.DATE: stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue))); break; case Types.TIME: stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue))); break; case Types.TIMESTAMP: long lTimestamp=0L; // Backwards compatibility note: Format was unsupported for a timestamp field. if (valueFormat.equals("")) { if(LONG_PATTERN.matcher(parameterValue).matches()){ lTimestamp = Long.parseLong(parameterValue); } else { final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); java.util.Date parsedDate = dateFormat.parse(parameterValue); lTimestamp = parsedDate.getTime(); } }else { final DateTimeFormatter dtFormatter = getDateTimeFormatter(valueFormat); TemporalAccessor accessor = dtFormatter.parse(parameterValue); java.util.Date parsedDate = java.util.Date.from(Instant.from(accessor)); lTimestamp = parsedDate.getTime(); } stmt.setTimestamp(parameterIndex, new Timestamp(lTimestamp)); break; case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: byte[] bValue; switch(valueFormat){ case "": case "ascii": bValue = parameterValue.getBytes("ASCII"); break; case "hex": bValue = DatatypeConverter.parseHexBinary(parameterValue); break; case "base64": bValue = DatatypeConverter.parseBase64Binary(parameterValue); break; default: throw new ParseException("Unable to parse binary data using the formatter `" + valueFormat + "`.",0); } stmt.setBinaryStream(parameterIndex, new ByteArrayInputStream(bValue), bValue.length); break; case Types.CHAR: case Types.VARCHAR: case Types.LONGNVARCHAR: case Types.LONGVARCHAR: stmt.setString(parameterIndex, parameterValue); break; default: stmt.setObject(parameterIndex, parameterValue, jdbcType); break; } } } private DateTimeFormatter getDateTimeFormatter(String pattern) { switch(pattern) { case "BASIC_ISO_DATE": return DateTimeFormatter.BASIC_ISO_DATE; case "ISO_LOCAL_DATE": return DateTimeFormatter.ISO_LOCAL_DATE; case "ISO_OFFSET_DATE": return DateTimeFormatter.ISO_OFFSET_DATE; case "ISO_DATE": return DateTimeFormatter.ISO_DATE; case "ISO_LOCAL_TIME": return DateTimeFormatter.ISO_LOCAL_TIME; case "ISO_OFFSET_TIME": return DateTimeFormatter.ISO_OFFSET_TIME; case "ISO_TIME": return DateTimeFormatter.ISO_TIME; case "ISO_LOCAL_DATE_TIME": return DateTimeFormatter.ISO_LOCAL_DATE_TIME; case "ISO_OFFSET_DATE_TIME": return DateTimeFormatter.ISO_OFFSET_DATE_TIME; case "ISO_ZONED_DATE_TIME": return DateTimeFormatter.ISO_ZONED_DATE_TIME; case "ISO_DATE_TIME": return DateTimeFormatter.ISO_DATE_TIME; case "ISO_ORDINAL_DATE": return DateTimeFormatter.ISO_ORDINAL_DATE; case "ISO_WEEK_DATE": return DateTimeFormatter.ISO_WEEK_DATE; case "ISO_INSTANT": return DateTimeFormatter.ISO_INSTANT; case "RFC_1123_DATE_TIME": return DateTimeFormatter.RFC_1123_DATE_TIME; default: return DateTimeFormatter.ofPattern(pattern); } } /** * A FlowFileFilter that is responsible for ensuring that the FlowFiles returned either belong * to the same "fragmented transaction" (i.e., 1 transaction whose information is fragmented * across multiple FlowFiles) or that none of the FlowFiles belongs to a fragmented transaction */ static class TransactionalFlowFileFilter implements FlowFileFilter { private String selectedId = null; private int numSelected = 0; private boolean ignoreFragmentIdentifiers = false; public boolean isFragmentedTransaction() { return !ignoreFragmentIdentifiers; } @Override public FlowFileFilterResult filter(final FlowFile flowFile) { final String fragmentId = flowFile.getAttribute(FRAGMENT_ID_ATTR); final String fragCount = flowFile.getAttribute(FRAGMENT_COUNT_ATTR); // if first FlowFile selected is not part of a fragmented transaction, then // we accept any FlowFile that is also not part of a fragmented transaction. if (ignoreFragmentIdentifiers) { if (fragmentId == null || "1".equals(fragCount)) { return FlowFileFilterResult.ACCEPT_AND_CONTINUE; } else { return FlowFileFilterResult.REJECT_AND_CONTINUE; } } if (fragmentId == null || "1".equals(fragCount)) { if (selectedId == null) { // Only one FlowFile in the transaction. ignoreFragmentIdentifiers = true; return FlowFileFilterResult.ACCEPT_AND_CONTINUE; } else { // we've already selected 1 FlowFile, and this one doesn't match. return FlowFileFilterResult.REJECT_AND_CONTINUE; } } if (selectedId == null) { // select this fragment id as the chosen one. selectedId = fragmentId; numSelected++; return FlowFileFilterResult.ACCEPT_AND_CONTINUE; } if (selectedId.equals(fragmentId)) { // fragment id's match. Find out if we have all of the necessary fragments or not. final int numFragments; if (fragCount != null && NUMBER_PATTERN.matcher(fragCount).matches()) { numFragments = Integer.parseInt(fragCount); } else { numFragments = Integer.MAX_VALUE; } if (numSelected >= numFragments - 1) { // We have all of the fragments we need for this transaction. return FlowFileFilterResult.ACCEPT_AND_TERMINATE; } else { // We still need more fragments for this transaction, so accept this one and continue. numSelected++; return FlowFileFilterResult.ACCEPT_AND_CONTINUE; } } else { return FlowFileFilterResult.REJECT_AND_CONTINUE; } } } /** * A simple, immutable data structure to hold a List of FlowFiles and an indicator as to whether * or not those FlowFiles represent a "fragmented transaction" - that is, a collection of FlowFiles * that all must be executed as a single transaction (we refer to it as a fragment transaction * because the information for that transaction, including SQL and the parameters, is fragmented * across multiple FlowFiles). */ private static class FlowFilePoll { private final List<FlowFile> flowFiles; private final boolean fragmentedTransaction; public FlowFilePoll(final List<FlowFile> flowFiles, final boolean fragmentedTransaction) { this.flowFiles = flowFiles; this.fragmentedTransaction = fragmentedTransaction; } public List<FlowFile> getFlowFiles() { return flowFiles; } public boolean isFragmentedTransaction() { return fragmentedTransaction; } } private static class FragmentedEnclosure extends StatementFlowFileEnclosure { private final Map<FlowFile, StatementFlowFileEnclosure> flowFileToEnclosure = new HashMap<>(); public FragmentedEnclosure() { super(null); } public void addFlowFile(final FlowFile flowFile, final StatementFlowFileEnclosure enclosure) { addFlowFile(flowFile); flowFileToEnclosure.put(flowFile, enclosure); } public StatementFlowFileEnclosure getTargetEnclosure(final FlowFile flowFile) { return flowFileToEnclosure.get(flowFile); } } /** * A simple, immutable data structure to hold a Prepared Statement and a List of FlowFiles * for which that statement should be evaluated. */ private static class StatementFlowFileEnclosure implements FlowFileGroup { private final String sql; private PreparedStatement statement; private final List<FlowFile> flowFiles = new ArrayList<>(); public StatementFlowFileEnclosure(String sql) { this.sql = sql; } public PreparedStatement getNewStatement(final Connection conn, final boolean obtainKeys) throws SQLException { if (obtainKeys) { // Create a new Prepared Statement, requesting that it return the generated keys. PreparedStatement stmt = conn.prepareStatement(sql, Statement.RETURN_GENERATED_KEYS); if (stmt == null) { // since we are passing Statement.RETURN_GENERATED_KEYS, calls to conn.prepareStatement will // in some cases (at least for DerbyDB) return null. // We will attempt to recompile the statement without the generated keys being returned. stmt = conn.prepareStatement(sql); } // If we need to obtain keys, then we cannot do a Batch Update. In this case, // we don't need to store the PreparedStatement in the Map because we aren't // doing an addBatch/executeBatch. Instead, we will use the statement once // and close it. return stmt; } return conn.prepareStatement(sql); } public PreparedStatement getCachedStatement(final Connection conn) throws SQLException { if (statement != null) { return statement; } statement = conn.prepareStatement(sql); return statement; } @Override public List<FlowFile> getFlowFiles() { return flowFiles; } public void addFlowFile(final FlowFile flowFile) { this.flowFiles.add(flowFile); } @Override public int hashCode() { return sql.hashCode(); } @Override public boolean equals(final Object obj) { if (obj == null) { return false; } if (obj == this) { return false; } if (!(obj instanceof StatementFlowFileEnclosure)) { return false; } final StatementFlowFileEnclosure other = (StatementFlowFileEnclosure) obj; return sql.equals(other.sql); } } }