package com.thinkbiganalytics.spark.dataprofiler.columns;
/*-
* #%L
* thinkbig-spark-job-profiler-app
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.thinkbiganalytics.spark.dataprofiler.ProfilerConfiguration;
import com.thinkbiganalytics.spark.dataprofiler.model.MetricType;
import com.thinkbiganalytics.spark.dataprofiler.output.OutputRow;
import org.apache.spark.sql.types.StructField;
import java.sql.Date;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nonnull;
/**
* Class to hold profile statistics for columns of date data type <br>
* [Hive data type: DATE]
*/
@SuppressWarnings("serial")
public class DateColumnStatistics extends StandardColumnStatistics {
/* Date specific metrics */
private Date maxDate;
private Date minDate;
/* Other variables */
private Date columnDateValue;
/**
* One-argument constructor
*
* @param columnField field schema
*/
public DateColumnStatistics(StructField columnField, @Nonnull final ProfilerConfiguration profilerConfiguration) {
super(columnField, profilerConfiguration);
String MIN_DATE = "1000-01-01";
maxDate = Date.valueOf(MIN_DATE);
String MAX_DATE = "9999-12-12";
minDate = Date.valueOf(MAX_DATE);
}
/**
* Calculate date-specific statistics by accommodating the value and frequency/count
*/
@Override
public void accomodate(Object columnValue, Long columnCount) {
accomodateCommon(columnValue, columnCount);
if (columnValue != null) {
columnDateValue = Date.valueOf(String.valueOf(columnValue));
}
if (columnDateValue != null) {
if (maxDate.before(columnDateValue)) {
maxDate = columnDateValue;
}
if (minDate.after(columnDateValue)) {
minDate = columnDateValue;
}
}
}
/**
* Combine with another column statistics
*/
@Override
public void combine(StandardColumnStatistics v_columnStatistics) {
combineCommon(v_columnStatistics);
DateColumnStatistics vDate_columnStatistics = (DateColumnStatistics) v_columnStatistics;
if (maxDate.before(vDate_columnStatistics.maxDate)) {
maxDate = vDate_columnStatistics.maxDate;
}
if (minDate.after(vDate_columnStatistics.minDate)) {
minDate = vDate_columnStatistics.minDate;
}
}
/**
* Print statistics to console
*/
@Override
public String getVerboseStatistics() {
return "{\n" + getVerboseStatisticsCommon()
+ "\n"
+ "DateColumnStatistics ["
+ "maxDate=" + maxDate
+ ", minDate=" + minDate
+ "]\n}";
}
/**
* Write statistics for output result table
*/
@Override
public List<OutputRow> getStatistics() {
final List<OutputRow> rows = new ArrayList<>();
writeStatisticsCommon(rows);
rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MAX_DATE), String.valueOf(maxDate)));
rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MIN_DATE), String.valueOf(minDate)));
return rows;
}
/**
* Get latest date
*
* @return latest date
*/
public Date getMaxDate() {
return maxDate;
}
/**
* Get earliest date
*
* @return earliest date
*/
public Date getMinDate() {
return minDate;
}
}