/* * Copyright [2012-2014] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.container.obj; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import ml.shifu.shifu.util.Constants; import java.io.Serializable; import java.util.Comparator; import java.util.List; /** * ColumnConfig class record the basic information for column in data. Almost all information in ColumnConfig is * generated automatically, user should avoid to change it manually, unless understanding the meaning of the changes. */ @JsonIgnoreProperties(ignoreUnknown = true) public class ColumnConfig { // add weight column and weight column is treated the same as meta public static enum ColumnFlag { ForceSelect, ForceRemove, Meta, Target, Weight } public static enum ColumnType { A, N, C } /** * Column index, start from 0 to length-1 */ private Integer columnNum; /** * Column name read from header file or first line of csv file. */ private String columnName; /** * Version of current ColumnConfig.json */ private String version = Constants.version; /** * Numerical or Categorical feature. */ private ColumnType columnType = ColumnType.N; /** * Meta, target, weight, force-select or force-remove columns */ private ColumnFlag columnFlag = null; /** * If column is final selected, if set to finalSelect for {@link #columnFlag}, no matter what quality it is, such * column will be final selected and {@link #finalSelect} is set to true. * * <p> * Only {@link #finalSelect} is determined as final training column. */ private Boolean finalSelect = Boolean.FALSE; /** * Column stats info */ private ColumnStats columnStats = new ColumnStats(); /** * Column binning info */ private ColumnBinning columnBinning = new ColumnBinning(); // /** // * Correlation array list // */ // private double[] corrArray; /** * Sample values of such column. */ private List<String> sampleValues; /* * --------------------------------------------------------------------------- * Auto-Gen methods * --------------------------------------------------------------------------- */ public Integer getColumnNum() { return columnNum; } public void setColumnNum(Integer columnNum) { this.columnNum = columnNum; } public String getColumnName() { return columnName; } public ColumnType getColumnType() { return columnType; } public void setColumnType(ColumnType columnType) { this.columnType = columnType; } public ColumnFlag getColumnFlag() { return columnFlag; } public void setColumnFlag(ColumnFlag columnFlag) { this.columnFlag = columnFlag; } public Boolean isFinalSelect() { return finalSelect; } public void setFinalSelect(Boolean finalSelect) { this.finalSelect = finalSelect; } public void setColumnName(String columnName) { this.columnName = columnName; } public ColumnStats getColumnStats() { return columnStats; } public void setColumnStats(ColumnStats columnStats) { this.columnStats = columnStats; } public ColumnBinning getColumnBinning() { return columnBinning; } public void setColumnBinning(ColumnBinning columnBinning) { this.columnBinning = columnBinning; } /* * --------------------------------------------------------------------------- * * Capsulated methods for easy usage * * --------------------------------------------------------------------------- */ @JsonIgnore public boolean isWeight() { return ColumnFlag.Weight == columnFlag; } @JsonIgnore public boolean isTarget() { return ColumnFlag.Target.equals(columnFlag); } @JsonIgnore public boolean isCandidate() { return !isForceRemove() && !isMeta() && !isTarget(); } @JsonIgnore public boolean isNumerical() { return columnType == ColumnType.N; } @JsonIgnore public boolean isCategorical() { return columnType == ColumnType.C; } // weigt column is also treated as meta column @JsonIgnore public boolean isMeta() { return ColumnFlag.Meta == columnFlag || ColumnFlag.Weight == columnFlag; } @JsonIgnore public boolean isForceRemove() { return ColumnFlag.ForceRemove == (columnFlag); } @JsonIgnore public boolean isForceSelect() { return ColumnFlag.ForceSelect == (columnFlag); } @JsonIgnore public int getBinLength() { return columnBinning.getLength(); } @JsonIgnore public List<Double> getBinBoundary() { return columnBinning.getBinBoundary(); } @JsonIgnore public List<String> getBinCategory() { return columnBinning.getBinCategory(); } @JsonIgnore public List<Integer> getBinCountNeg() { return columnBinning.getBinCountNeg(); } @JsonIgnore public List<Integer> getBinCountPos() { return columnBinning.getBinCountPos(); } @JsonIgnore public List<Double> getBinPosRate() { return columnBinning.getBinPosRate(); } @JsonIgnore public List<Integer> getBinAvgScore() { return columnBinning.getBinAvgScore(); } @JsonIgnore public List<Double> getBinCountWoe() { return columnBinning.getBinCountWoe(); } @JsonIgnore public List<Double> getBinWeightedWoe() { return columnBinning.getBinWeightedWoe(); } public void setBinLength(int length) { columnBinning.setLength(length); } public void setBinBoundary(List<Double> binBoundary) { columnBinning.setBinBoundary(binBoundary); columnBinning.setLength(binBoundary == null ? 0 : binBoundary.size()); } public void setBinCategory(List<String> binCategory) { columnBinning.setBinCategory(binCategory); columnBinning.setLength(binCategory == null ? 0 : binCategory.size()); } public void setBinCountNeg(List<Integer> binCountNeg) { columnBinning.setBinCountNeg(binCountNeg); } public void setBinCountPos(List<Integer> binCountPos) { columnBinning.setBinCountPos(binCountPos); } public void setBinPosCaseRate(List<Double> binPosRate) { columnBinning.setBinPosRate(binPosRate); } public void setBinAvgScore(List<Integer> binAvgScore) { columnBinning.setBinAvgScore(binAvgScore); } @JsonIgnore public Double getKs() { return columnStats.getKs(); } @JsonIgnore public Double getIv() { return columnStats.getIv(); } @JsonIgnore public Double getMean() { return columnStats.getMean(); } @JsonIgnore public Double getStdDev() { return columnStats.getStdDev(); } @JsonIgnore public Double getMedian() { return columnStats.getMedian(); } @JsonIgnore public Long getMissingCount() { return columnStats.getMissingCount(); } @JsonIgnore public Long getTotalCount() { return columnStats.getTotalCount(); } @JsonIgnore public Double getMissingPercentage() { return columnStats.getMissingPercentage(); } public void setKs(double ks) { columnStats.setKs(ks); } public void setIv(double iv) { columnStats.setIv(iv); } public void setMax(Double max) { columnStats.setMax(max); } public void setMin(Double min) { columnStats.setMin(min); } public void setMean(Double mean) { columnStats.setMean(mean); } public void setStdDev(Double stdDev) { columnStats.setStdDev(stdDev); } @JsonIgnore public void setMedian(Double median) { columnStats.setMedian(median); } @JsonIgnore public void setMissingCnt(Long cnt) { columnStats.setMissingCount(cnt); } @JsonIgnore public void setTotalCount(Long cnt) { columnStats.setTotalCount(cnt); } @JsonIgnore public void setMissingPercentage(Double missingPercentage) { columnStats.setMissingPercentage(missingPercentage); } @JsonIgnore public List<Double> getBinWeightedNeg() { return this.columnBinning.getBinWeightedNeg(); } @JsonIgnore public List<Double> getBinWeightedPos() { return this.columnBinning.getBinWeightedPos(); } @JsonIgnore public void setBinWeightedNeg(List<Double> binList) { this.columnBinning.setBinWeightedNeg(binList); } @JsonIgnore public void setBinWeightedPos(List<Double> binList) { this.columnBinning.setBinWeightedPos(binList); } /** * @return the version */ public String getVersion() { return version; } /** * @param version * the version to set */ public void setVersion(String version) { this.version = version; } @JsonIgnore public void setPSI(Double psi) { this.columnStats.setPsi(psi); } @JsonIgnore public Double getPSI() { return this.columnStats.getPsi(); } @JsonIgnore public List<String> getUnitStats() { return this.columnStats.getUnitStats(); } @JsonIgnore public void setUnitStats(List<String> unitStats) { this.columnStats.setUnitStats(unitStats); } // /** // * @return the corrArray // */ // public double[] getCorrArray() { // return corrArray; // } // // /** // * @param corrArray // * the corrArray to set // */ // public void setCorrArray(double[] corrArray) { // this.corrArray = corrArray; // } /** * ColumnConfigComparator class */ public static class ColumnConfigComparator implements Comparator<ColumnConfig>, Serializable { private static final long serialVersionUID = -1636776342389912951L; private String key; public ColumnConfigComparator(String key) { this.key = key; } public int compare(ColumnConfig a, ColumnConfig b) { if(key.equalsIgnoreCase("KS")) { return b.getKs().compareTo(a.getKs()); } else { return b.getIv().compareTo(a.getIv()); } } } @Override public ColumnConfig clone() { ColumnConfig other = new ColumnConfig(); other.setColumnName(columnName); other.setColumnNum(columnNum); other.setVersion(version); other.setColumnType(columnType); other.setColumnFlag(columnFlag); other.setFinalSelect(finalSelect); other.setColumnStats(columnStats); other.setColumnBinning(columnBinning); // other.setCorrArray(corrArray); return other; } /** * @return the sampleValues */ public List<String> getSampleValues() { return sampleValues; } /** * @param sampleValues * the sampleValues to set */ public void setSampleValues(List<String> sampleValues) { this.sampleValues = sampleValues; } }