package com.thinkbiganalytics.spark.dataprofiler.model;
/*-
* #%L
* thinkbig-spark-job-profiler-app
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
/**
* List of metric types<br>
* A subset of these metric types will be calculated for each data type<br>
* For specifics, please refer to datatype metric mapping matrix
*/
public enum MetricType {
/**
* Datatype of column (as evaluated by Spark)
*/
COLUMN_DATATYPE,
/**
* Does column allow null values?
*/
COLUMN_NULLABLE,
/**
* Metadata associated with column
*/
COLUMN_METADATA,
/**
* Number of null values
*/
NULL_COUNT,
/**
* Total values (includes nulls and empty values)
*/
TOTAL_COUNT,
/**
* Total unique values (null and empty are considered a unique value each)
*/
UNIQUE_COUNT,
/**
* Percentage of null values
*/
PERC_NULL_VALUES,
/**
* Percentage of unique values
*/
PERC_UNIQUE_VALUES,
/**
* Percentage of duplicate values
*/
PERC_DUPLICATE_VALUES,
/**
* Top n values (in order of their frequency)
*/
TOP_N_VALUES,
/**
* Maximum length of string
*/
MAX_LENGTH,
/**
* Minimum length of string (empty strings ignored)
*/
MIN_LENGTH,
/**
* Longest string value
*/
LONGEST_STRING,
/**
* Shortest string value (empty strings ignored)
*/
SHORTEST_STRING,
/**
* Total empty strings (empty string is not a null value)
*/
EMPTY_COUNT,
/**
* Percentage of empty strings
*/
PERC_EMPTY_VALUES,
/**
* Maximum of numeric values
*/
MAX,
/**
* Minimum of numeric values
*/
MIN,
/**
* Sum of numeric values
*/
SUM,
/**
* Count of TRUE boolean values
*/
TRUE_COUNT,
/**
* Count of FALSE boolean values
*/
FALSE_COUNT,
/**
* Mean (average) of numeric values
*/
MEAN,
/**
* Standard Deviation (Population) of numeric values
*/
STDDEV,
/**
* Variance (Population) of numeric values
*/
VARIANCE,
/**
* Latest date value
*/
MAX_DATE,
/**
* Earliest date value
*/
MIN_DATE,
/**
* Latest timestamp value
*/
MAX_TIMESTAMP,
/**
* Earliest timestamp value
*/
MIN_TIMESTAMP,
/**
* Min string (Lexical ordering) (Case-sensitive)
*/
MIN_STRING_CASE,
/**
* Max string (Lexical ordering) (Case-sensitive)
*/
MAX_STRING_CASE,
/**
* Min string (Lexical ordering) (Case-insensitive)
*/
MIN_STRING_ICASE,
/**
* Max string (Lexical ordering) (Case-insensitive)
*/
MAX_STRING_ICASE
}