/*
* Copyright [2012-2014] PayPal Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.shifu.container.obj;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
/**
* {@link ModelNormalizeConf} is 'nomalize' part configuration in ModelConfig.json
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ModelNormalizeConf {
/**
* Normalization type including ZSCALE, WOE, WEIGHT_WOE, HYBRID, WEIGHT_HYBRID.
*/
@JsonDeserialize(using = NormTypeDeserializer.class)
public static enum NormType {
OLD_ZSCORE, OLD_ZSCALE, // the same one for user friendly
ZSCORE, ZSCALE, // the same one for user friendly
WOE, WEIGHT_WOE, HYBRID, WEIGHT_HYBRID, WOE_ZSCORE, WOE_ZSCALE, WEIGHT_WOE_ZSCORE, WEIGHT_WOE_ZSCALE;
}
/**
* Different correlation computing methods. TODO move to stats?
*
* @author Zhang David (pengzhang@paypal.com)
*/
@JsonDeserialize(using = CorrelationDeserializer.class)
public static enum Correlation {
None, Pearson, NormPearson // Spearman mode isn't implemented as need sort all variables
}
/**
* STDDev cutoff threshold, if over this value after zscore, such value will be cutoff to current value or negative
* of this value.
*/
private Double stdDevCutOff = Double.valueOf(4.0);
/**
* If do sampling in norm step, training will be impacted by sampling because norm output is train input
*/
private Double sampleRate = Double.valueOf(1.0);
/**
* If only sample negative with sampleRate enabled
*/
private Boolean sampleNegOnly = Boolean.FALSE;
/**
* Different norm type
*/
private NormType normType = NormType.ZSCALE;
/**
* If norm output is parquet format, if parquet format and only part of features are selected, in training, only
* selected columns are read. So far Parquet format only supports NN algorithm.
*/
private Boolean isParquet = Boolean.FALSE;
public Double getStdDevCutOff() {
return stdDevCutOff;
}
public void setStdDevCutOff(Double stdDevCutOff) {
this.stdDevCutOff = stdDevCutOff;
}
public Double getSampleRate() {
return sampleRate;
}
public void setSampleRate(Double sampleRate) {
this.sampleRate = sampleRate;
}
public Boolean getSampleNegOnly() {
return sampleNegOnly;
}
public void setSampleNegOnly(Boolean sampleNegOnly) {
this.sampleNegOnly = sampleNegOnly;
}
/**
* @return the normType
*/
public NormType getNormType() {
return normType;
}
/**
* @param normType
* the normType to set
*/
public void setNormType(NormType normType) {
this.normType = normType;
}
/**
* @return the isParquet
*/
@JsonIgnore
public Boolean getIsParquet() {
return isParquet;
}
/**
* @param isParquet
* the isParquet to set
*/
@JsonProperty
public void setIsParquet(Boolean isParquet) {
this.isParquet = isParquet;
}
@Override
public ModelNormalizeConf clone() {
ModelNormalizeConf other = new ModelNormalizeConf();
other.setNormType(normType);
other.setSampleRate(sampleRate);
other.setSampleNegOnly(sampleNegOnly);
other.setStdDevCutOff(stdDevCutOff);
other.setIsParquet(isParquet);
// other.setCorrelation(correlation);
return other;
}
}