/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.standard;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.Stateful;
import org.apache.nifi.annotation.behavior.TriggerSerially;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.Validator;
import org.apache.nifi.components.state.Scope;
import org.apache.nifi.components.state.StateManager;
import org.apache.nifi.components.state.StateMap;
import org.apache.nifi.dbcp.DBCPService;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.util.StringUtils;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* A processor to retrieve a list of tables (and their metadata) from a database connection
*/
@TriggerSerially
@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN)
@Tags({"sql", "list", "jdbc", "table", "database"})
@CapabilityDescription("Generates a set of flow files, each containing attributes corresponding to metadata about a table from a database connection. Once "
+ "metadata about a table has been fetched, it will not be fetched again until the Refresh Interval (if set) has elapsed, or until state has been "
+ "manually cleared.")
@WritesAttributes({
@WritesAttribute(attribute = "db.table.name", description = "Contains the name of a database table from the connection"),
@WritesAttribute(attribute = "db.table.catalog", description = "Contains the name of the catalog to which the table belongs (may be null)"),
@WritesAttribute(attribute = "db.table.schema", description = "Contains the name of the schema to which the table belongs (may be null)"),
@WritesAttribute(attribute = "db.table.fullname", description = "Contains the fully-qualifed table name (possibly including catalog, schema, etc.)"),
@WritesAttribute(attribute = "db.table.type",
description = "Contains the type of the database table from the connection. Typical types are \"TABLE\", \"VIEW\", \"SYSTEM TABLE\", "
+ "\"GLOBAL TEMPORARY\", \"LOCAL TEMPORARY\", \"ALIAS\", \"SYNONYM\""),
@WritesAttribute(attribute = "db.table.remarks", description = "Contains the name of a database table from the connection"),
@WritesAttribute(attribute = "db.table.count", description = "Contains the number of rows in the table")
})
@Stateful(scopes = {Scope.CLUSTER}, description = "After performing a listing of tables, the timestamp of the query is stored. "
+ "This allows the Processor to not re-list tables the next time that the Processor is run. Specifying the refresh interval in the processor properties will "
+ "indicate that when the processor detects the interval has elapsed, the state will be reset and tables will be re-listed as a result. "
+ "This processor is meant to be run on the primary node only.")
public class ListDatabaseTables extends AbstractProcessor {
// Attribute names
public static final String DB_TABLE_NAME = "db.table.name";
public static final String DB_TABLE_CATALOG = "db.table.catalog";
public static final String DB_TABLE_SCHEMA = "db.table.schema";
public static final String DB_TABLE_FULLNAME = "db.table.fullname";
public static final String DB_TABLE_TYPE = "db.table.type";
public static final String DB_TABLE_REMARKS = "db.table.remarks";
public static final String DB_TABLE_COUNT = "db.table.count";
// Relationships
public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("All FlowFiles that are received are routed to success")
.build();
// Property descriptors
public static final PropertyDescriptor DBCP_SERVICE = new PropertyDescriptor.Builder()
.name("list-db-tables-db-connection")
.displayName("Database Connection Pooling Service")
.description("The Controller Service that is used to obtain connection to database")
.required(true)
.identifiesControllerService(DBCPService.class)
.build();
public static final PropertyDescriptor CATALOG = new PropertyDescriptor.Builder()
.name("list-db-tables-catalog")
.displayName("Catalog")
.description("The name of a catalog from which to list database tables. The name must match the catalog name as it is stored in the database. "
+ "If the property is not set, the catalog name will not be used to narrow the search for tables. If the property is set to an empty string, "
+ "tables without a catalog will be listed.")
.required(false)
.addValidator(Validator.VALID)
.build();
public static final PropertyDescriptor SCHEMA_PATTERN = new PropertyDescriptor.Builder()
.name("list-db-tables-schema-pattern")
.displayName("Schema Pattern")
.description("A pattern for matching schemas in the database. Within a pattern, \"%\" means match any substring of 0 or more characters, "
+ "and \"_\" means match any one character. The pattern must match the schema name as it is stored in the database. "
+ "If the property is not set, the schema name will not be used to narrow the search for tables. If the property is set to an empty string, "
+ "tables without a schema will be listed.")
.required(false)
.addValidator(Validator.VALID)
.build();
public static final PropertyDescriptor TABLE_NAME_PATTERN = new PropertyDescriptor.Builder()
.name("list-db-tables-name-pattern")
.displayName("Table Name Pattern")
.description("A pattern for matching tables in the database. Within a pattern, \"%\" means match any substring of 0 or more characters, "
+ "and \"_\" means match any one character. The pattern must match the table name as it is stored in the database. "
+ "If the property is not set, all tables will be retrieved.")
.required(false)
.addValidator(Validator.VALID)
.build();
public static final PropertyDescriptor TABLE_TYPES = new PropertyDescriptor.Builder()
.name("list-db-tables-types")
.displayName("Table Types")
.description("A comma-separated list of table types to include. For example, some databases support TABLE and VIEW types. If the property is not set, "
+ "tables of all types will be returned.")
.required(false)
.defaultValue("TABLE")
.addValidator(Validator.VALID)
.build();
public static final PropertyDescriptor INCLUDE_COUNT = new PropertyDescriptor.Builder()
.name("list-db-include-count")
.displayName("Include Count")
.description("Whether to include the table's row count as a flow file attribute. This affects performance as a database query will be generated "
+ "for each table in the retrieved list.")
.required(true)
.allowableValues("true", "false")
.defaultValue("false")
.build();
public static final PropertyDescriptor REFRESH_INTERVAL = new PropertyDescriptor.Builder()
.name("list-db-refresh-interval")
.displayName("Refresh Interval")
.description("The amount of time to elapse before resetting the processor state, thereby causing all current tables to be listed. "
+ "During this interval, the processor may continue to run, but tables that have already been listed will not be re-listed. However new/added "
+ "tables will be listed as the processor runs. A value of zero means the state will never be automatically reset, the user must "
+ "Clear State manually.")
.required(true)
.defaultValue("0 sec")
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.build();
private static final List<PropertyDescriptor> propertyDescriptors;
private static final Set<Relationship> relationships;
/*
* Will ensure that the list of property descriptors is build only once.
* Will also create a Set of relationships
*/
static {
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
_propertyDescriptors.add(DBCP_SERVICE);
_propertyDescriptors.add(CATALOG);
_propertyDescriptors.add(SCHEMA_PATTERN);
_propertyDescriptors.add(TABLE_NAME_PATTERN);
_propertyDescriptors.add(TABLE_TYPES);
_propertyDescriptors.add(INCLUDE_COUNT);
_propertyDescriptors.add(REFRESH_INTERVAL);
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
Set<Relationship> _relationships = new HashSet<>();
_relationships.add(REL_SUCCESS);
relationships = Collections.unmodifiableSet(_relationships);
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return propertyDescriptors;
}
@Override
public Set<Relationship> getRelationships() {
return relationships;
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final ComponentLog logger = getLogger();
final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
final String catalog = context.getProperty(CATALOG).getValue();
final String schemaPattern = context.getProperty(SCHEMA_PATTERN).getValue();
final String tableNamePattern = context.getProperty(TABLE_NAME_PATTERN).getValue();
final String[] tableTypes = context.getProperty(TABLE_TYPES).isSet()
? context.getProperty(TABLE_TYPES).getValue().split("\\s*,\\s*")
: null;
final boolean includeCount = context.getProperty(INCLUDE_COUNT).asBoolean();
final long refreshInterval = context.getProperty(REFRESH_INTERVAL).asTimePeriod(TimeUnit.MILLISECONDS);
final StateManager stateManager = context.getStateManager();
final StateMap stateMap;
final Map<String, String> stateMapProperties;
try {
stateMap = stateManager.getState(Scope.CLUSTER);
stateMapProperties = new HashMap<>(stateMap.toMap());
} catch (IOException ioe) {
throw new ProcessException(ioe);
}
try (final Connection con = dbcpService.getConnection()) {
DatabaseMetaData dbMetaData = con.getMetaData();
ResultSet rs = dbMetaData.getTables(catalog, schemaPattern, tableNamePattern, tableTypes);
while (rs.next()) {
final String tableCatalog = rs.getString(1);
final String tableSchema = rs.getString(2);
final String tableName = rs.getString(3);
final String tableType = rs.getString(4);
final String tableRemarks = rs.getString(5);
// Build fully-qualified name
String fqn = Stream.of(tableCatalog, tableSchema, tableName)
.filter(segment -> !StringUtils.isEmpty(segment))
.collect(Collectors.joining("."));
String lastTimestampForTable = stateMapProperties.get(fqn);
boolean refreshTable = true;
try {
// Refresh state if the interval has elapsed
long lastRefreshed = -1;
final long currentTime = System.currentTimeMillis();
if (!StringUtils.isEmpty(lastTimestampForTable)) {
lastRefreshed = Long.parseLong(lastTimestampForTable);
}
if (lastRefreshed == -1 || (refreshInterval > 0 && currentTime >= (lastRefreshed + refreshInterval))) {
stateMapProperties.remove(lastTimestampForTable);
} else {
refreshTable = false;
}
} catch (final NumberFormatException nfe) {
getLogger().error("Failed to retrieve observed last table fetches from the State Manager. Will not perform "
+ "query until this is accomplished.", nfe);
context.yield();
return;
}
if (refreshTable) {
FlowFile flowFile = session.create();
logger.info("Found {}: {}", new Object[]{tableType, fqn});
if (includeCount) {
try (Statement st = con.createStatement()) {
final String countQuery = "SELECT COUNT(1) FROM " + fqn;
logger.debug("Executing query: {}", new Object[]{countQuery});
ResultSet countResult = st.executeQuery(countQuery);
if (countResult.next()) {
flowFile = session.putAttribute(flowFile, DB_TABLE_COUNT, Long.toString(countResult.getLong(1)));
}
} catch (SQLException se) {
logger.error("Couldn't get row count for {}", new Object[]{fqn});
session.remove(flowFile);
continue;
}
}
if (tableCatalog != null) {
flowFile = session.putAttribute(flowFile, DB_TABLE_CATALOG, tableCatalog);
}
if (tableSchema != null) {
flowFile = session.putAttribute(flowFile, DB_TABLE_SCHEMA, tableSchema);
}
flowFile = session.putAttribute(flowFile, DB_TABLE_NAME, tableName);
flowFile = session.putAttribute(flowFile, DB_TABLE_FULLNAME, fqn);
flowFile = session.putAttribute(flowFile, DB_TABLE_TYPE, tableType);
if (tableRemarks != null) {
flowFile = session.putAttribute(flowFile, DB_TABLE_REMARKS, tableRemarks);
}
String transitUri;
try {
transitUri = dbMetaData.getURL();
} catch (SQLException sqle) {
transitUri = "<unknown>";
}
session.getProvenanceReporter().receive(flowFile, transitUri);
session.transfer(flowFile, REL_SUCCESS);
stateMapProperties.put(fqn, Long.toString(System.currentTimeMillis()));
}
}
// Update the timestamps for listed tables
if (stateMap.getVersion() == -1) {
stateManager.setState(stateMapProperties, Scope.CLUSTER);
} else {
stateManager.replace(stateMap, stateMapProperties, Scope.CLUSTER);
}
} catch (final SQLException | IOException e) {
throw new ProcessException(e);
}
}
}