/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.compliance.purger; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import com.google.common.base.Optional; import com.google.common.base.Throwables; import lombok.extern.slf4j.Slf4j; import gobblin.compliance.ComplianceConfigurationKeys; import gobblin.compliance.ComplianceEvents; import gobblin.compliance.utils.DatasetUtils; import gobblin.compliance.HivePartitionDataset; import gobblin.compliance.HivePartitionFinder; import gobblin.configuration.State; import gobblin.configuration.WorkUnitState; import gobblin.dataset.DatasetsFinder; import gobblin.instrumented.Instrumented; import gobblin.metrics.MetricContext; import gobblin.metrics.event.EventSubmitter; import gobblin.publisher.DataPublisher; import gobblin.source.workunit.WorkUnit; import gobblin.util.reflection.GobblinConstructorUtils; /** * The Publisher moves COMMITTED WorkUnitState to SUCCESSFUL, otherwise FAILED. * * @author adsharma */ @Slf4j public class HivePurgerPublisher extends DataPublisher { protected List<HivePartitionDataset> datasets = new ArrayList<>(); protected MetricContext metricContext; protected EventSubmitter eventSubmitter; protected DatasetsFinder datasetFinder; public HivePurgerPublisher(State state) { super(state); this.metricContext = Instrumented.getMetricContext(state, this.getClass()); this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, ComplianceEvents.NAMESPACE). build(); String datasetFinderClass = state.getProp(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS, HivePartitionFinder.class.getName()); this.datasetFinder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, datasetFinderClass, state); try { this.datasets = this.datasetFinder.findDatasets(); } catch (IOException e) { Throwables.propagate(e); } } public void initialize() { } @Override public void publishData(Collection<? extends WorkUnitState> states) { for (WorkUnitState state : states) { if (state.getWorkingState() == WorkUnitState.WorkingState.SUCCESSFUL) { state.setWorkingState(WorkUnitState.WorkingState.COMMITTED); submitEvent(state, ComplianceEvents.Purger.WORKUNIT_COMMITTED); } else { state.setWorkingState(WorkUnitState.WorkingState.FAILED); submitEvent(state, ComplianceEvents.Purger.WORKUNIT_FAILED); } } } private void submitEvent(WorkUnitState state, String name) { WorkUnit workUnit = state.getWorkunit(); Map<String, String> metadata = new HashMap<>(); metadata.put(ComplianceConfigurationKeys.WORKUNIT_RECORDSREAD, state.getProp(ComplianceConfigurationKeys.NUM_ROWS)); metadata.put(ComplianceConfigurationKeys.WORKUNIT_BYTESREAD, getDataSize(workUnit.getProp(ComplianceConfigurationKeys.RAW_DATA_SIZE), workUnit.getProp(ComplianceConfigurationKeys.TOTAL_SIZE))); Optional<HivePartitionDataset> dataset = DatasetUtils.findDataset(workUnit.getProp(ComplianceConfigurationKeys.PARTITION_NAME), this.datasets); if (!dataset.isPresent()) { return; } HivePartitionDataset hivePartitionDataset = dataset.get(); metadata.put(ComplianceConfigurationKeys.WORKUNIT_RECORDSWRITTEN, DatasetUtils .getProperty(hivePartitionDataset, ComplianceConfigurationKeys.NUM_ROWS, ComplianceConfigurationKeys.DEFAULT_NUM_ROWS)); metadata.put(ComplianceConfigurationKeys.WORKUNIT_BYTESWRITTEN, getDataSize(DatasetUtils .getProperty(hivePartitionDataset, ComplianceConfigurationKeys.RAW_DATA_SIZE, ComplianceConfigurationKeys.DEFAULT_RAW_DATA_SIZE), DatasetUtils .getProperty(hivePartitionDataset, ComplianceConfigurationKeys.TOTAL_SIZE, ComplianceConfigurationKeys.DEFAULT_TOTAL_SIZE))); metadata.put(ComplianceConfigurationKeys.PARTITION_NAME, hivePartitionDataset.datasetURN()); this.eventSubmitter.submit(name, metadata); } private String getDataSize(String rawDataSize, String totalDataSize) { int rawDataSizeVal = Integer.parseInt(rawDataSize); int totalDataSizeVal = Integer.parseInt(totalDataSize); int dataSize = totalDataSizeVal; if (totalDataSizeVal <= 0) { dataSize = rawDataSizeVal; } return Integer.toString(dataSize); } public void publishMetadata(Collection<? extends WorkUnitState> states) { } @Override public void close() { } }