/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.audit.values.auditor; import java.io.Closeable; import java.io.IOException; import lombok.AllArgsConstructor; import lombok.Getter; import org.apache.avro.generic.GenericRecord; import com.typesafe.config.Config; import gobblin.audit.values.policy.column.ColumnProjectionPolicy; import gobblin.audit.values.policy.column.DefaultColumnProjectionPolicyFactory; import gobblin.audit.values.policy.row.DefaultRowSelectionPolicyFactory; import gobblin.audit.values.policy.row.RowSelectionPolicy; import gobblin.audit.values.sink.AuditSink; import gobblin.audit.values.sink.DefaultAuditSinkFactory; /** * The class that implements value based auditing. The class captures the values of certain * columns from the rows in the dataset using the {@link ColumnProjectionPolicy}. * This is done for every row or for a sample of the rows as defined by the {@link RowSelectionPolicy}. * The selected rows are then written to the {@link AuditSink} * * {@link ValueAuditGenerator#audit(GenericRecord)} is the method that audits an inputRecord. */ @AllArgsConstructor @Getter public class ValueAuditGenerator implements Closeable { public static final String COLUMN_PROJECTION_CONFIG_SCOPE = "columnProjection"; public static final String ROW_SELECTION_CONFIG_SCOPE = "rowSelection"; public static final String AUDIT_SINK_CONFIG_SCOPE = "auditSink"; private final ColumnProjectionPolicy columnProjectionPolicy; private final RowSelectionPolicy rowSelectionPolicy; private final AuditSink auditSink; /** * Factory method to create a new {@link ValueAuditGenerator} * @param config job configs * @param runtimeAuditMetadata is used to pass the table specific runtime information like tablename, databaseName, snapshotName etc. * See {@link ValueAuditRuntimeMetadata} * @return a new {@link ValueAuditGenerator} */ public static ValueAuditGenerator create(Config config, ValueAuditRuntimeMetadata runtimeAuditMetadata) { ColumnProjectionPolicy columnProjectionPolicy = DefaultColumnProjectionPolicyFactory.getInstance().create( config.getConfig(COLUMN_PROJECTION_CONFIG_SCOPE),runtimeAuditMetadata.getTableMetadata()); RowSelectionPolicy rowSelectionPolicy = DefaultRowSelectionPolicyFactory.getInstance().create( config.getConfig(ROW_SELECTION_CONFIG_SCOPE), runtimeAuditMetadata.getTableMetadata(), columnProjectionPolicy); AuditSink auditSink = DefaultAuditSinkFactory.getInstance().create( config.getConfig(AUDIT_SINK_CONFIG_SCOPE), runtimeAuditMetadata); return new ValueAuditGenerator(columnProjectionPolicy, rowSelectionPolicy, auditSink); } /** * Write an audit record for the <code>inputRecord</code> to the {@link AuditSink}. * An audit record is generated for every <code>inputRecord</code> that satisfies the {@link RowSelectionPolicy}. * An audit record is created by projecting <code>inputRecord</code> using the {@link ColumnProjectionPolicy} * * @param inputRecord to be audited * @throws IOException if auditing failed for this record */ public void audit(GenericRecord inputRecord) throws IOException { if (this.rowSelectionPolicy.shouldSelectRow(inputRecord)) { auditSink.write(columnProjectionPolicy.project(inputRecord)); } } @Override public void close() throws IOException { this.auditSink.close(); } }