package com.thinkbiganalytics.spark.dataprofiler.columns;
/*-
* #%L
* thinkbig-spark-job-profiler-app
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.thinkbiganalytics.spark.dataprofiler.ProfilerConfiguration;
import com.thinkbiganalytics.spark.dataprofiler.model.MetricType;
import com.thinkbiganalytics.spark.dataprofiler.output.OutputRow;
import org.apache.spark.sql.types.StructField;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nonnull;
/**
* Class to hold profile statistics for columns of boolean data type <br>
* [Hive data type: BOOLEAN]
*/
@SuppressWarnings("serial")
public class BooleanColumnStatistics extends StandardColumnStatistics {
/* Boolean specific metrics */
private long trueCount;
private long falseCount;
/* Other variables */
private boolean columnBooleanValue;
/**
* One-argument constructor
*
* @param columnField field schema
*/
public BooleanColumnStatistics(StructField columnField, @Nonnull final ProfilerConfiguration profilerConfiguration) {
super(columnField, profilerConfiguration);
trueCount = 0L;
falseCount = 0L;
columnBooleanValue = Boolean.TRUE;
}
/**
* Calculate boolean-specific statistics by accommodating the value and frequency/count
*/
@Override
public void accomodate(Object columnValue, Long columnCount) {
accomodateCommon(columnValue, columnCount);
if (columnValue != null) {
columnBooleanValue = Boolean.valueOf(String.valueOf(columnValue));
if (columnBooleanValue == Boolean.TRUE) {
trueCount += columnCount;
} else {
falseCount += columnCount;
}
}
}
/**
* Combine with another column statistics
*/
@Override
public void combine(StandardColumnStatistics v_columnStatistics) {
combineCommon(v_columnStatistics);
BooleanColumnStatistics vBoolean_columnStatistics = (BooleanColumnStatistics) v_columnStatistics;
trueCount += vBoolean_columnStatistics.trueCount;
falseCount += vBoolean_columnStatistics.falseCount;
}
/**
* Print statistics to console
*/
@Override
public String getVerboseStatistics() {
return "{\n" + getVerboseStatisticsCommon()
+ "\n"
+ "BooleanColumnStatistics ["
+ "trueCount=" + trueCount
+ ", falseCount=" + falseCount
+ "]\n}";
}
/**
* Write statistics for output result table
*/
@Override
public List<OutputRow> getStatistics() {
final List<OutputRow> rows = new ArrayList<>();
writeStatisticsCommon(rows);
rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.TRUE_COUNT), String.valueOf(trueCount)));
rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.FALSE_COUNT), String.valueOf(falseCount)));
return rows;
}
/**
* Get TRUE count
*
* @return TRUE count
*/
public long getTrueCount() {
return trueCount;
}
/**
* Get FALSE count
*
* @return FALSE count
*/
public long getFalseCount() {
return falseCount;
}
}