/* * Copyright [2012-2014] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.container.obj; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import ml.shifu.shifu.container.obj.RawSourceData.SourceType; import ml.shifu.shifu.fs.PathFinder; import ml.shifu.shifu.util.CommonUtils; import ml.shifu.shifu.util.Constants; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.Path; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * EvalConfig class */ @JsonIgnoreProperties(ignoreUnknown = true) public class EvalConfig { private String name; private RawSourceData dataSet; private Integer performanceBucketNum = 10; private String performanceScoreSelector = "mean"; private String scoreMetaColumnNameFile; private Map<String, String> customPaths; /** * For typical 0-1 binary regression, this is set to be true, while for other regression, better to set it to false * as normal regression. */ private Boolean gbtConvertToProb = Boolean.TRUE; /** * Cache meta columns to a list to avoid reading this file for several times */ @JsonIgnore private volatile List<String> metaColumns = null; /** * Cache raw score meta columns to avoid reading file several times */ @JsonIgnore private volatile List<String> scoreMetaColumns = null; public EvalConfig() { customPaths = new HashMap<String, String>(1); /** * Since most user won't use this function, * hidden the custom paths for creating new model. */ /* * customPaths.put(Constants.KEY_MODELS_PATH, null); * customPaths.put(Constants.KEY_SCORE_PATH, null); * customPaths.put(Constants.KEY_CONFUSION_MATRIX_PATH, null); * customPaths.put(Constants.KEY_PERFORMANCE_PATH, null); */ } /** * @return the models_path */ @JsonIgnore public String getModelsPath() { return ((customPaths == null) ? null : customPaths.get(Constants.KEY_MODELS_PATH)); } /** * @return the score_path */ @JsonIgnore public String getScorePath() { return ((customPaths == null) ? null : customPaths.get(Constants.KEY_SCORE_PATH)); } /** * @return the performance_path */ @JsonIgnore public String getPerformancePath() { return ((customPaths == null) ? null : customPaths.get(Constants.KEY_PERFORMANCE_PATH)); } /** * @return the confusionMatrixPath */ @JsonIgnore public String getConfusionMatrixPath() { return ((customPaths == null) ? null : customPaths.get(Constants.KEY_CONFUSION_MATRIX_PATH)); } @JsonIgnore public List<String> getScoreMetaColumns(ModelConfig modelConfig) throws IOException { if(scoreMetaColumns == null) { synchronized (this) { if (scoreMetaColumns == null) { if ( StringUtils.isNotBlank(scoreMetaColumnNameFile) ) { String path = scoreMetaColumnNameFile; if ( SourceType.HDFS.equals(dataSet.getSource()) ) { PathFinder pathFinder = new PathFinder(modelConfig); File file = new File(scoreMetaColumnNameFile); path = new Path(pathFinder.getEvalSetPath(this), file.getName()).toString(); } String delimiter = StringUtils.isBlank(dataSet.getHeaderDelimiter()) ? dataSet.getDataDelimiter() : dataSet.getHeaderDelimiter(); scoreMetaColumns = CommonUtils.readConfFileIntoList(path, dataSet.getSource(), delimiter); } if ( this.scoreMetaColumns == null ) { this.scoreMetaColumns = new ArrayList<String>(); } } } } return scoreMetaColumns; } @JsonIgnore public List<String> getAllMetaColumns(ModelConfig modelConfig) throws IOException { if(metaColumns == null) { synchronized(this) { if(metaColumns == null) { List<String> scoreMetaColumns = getScoreMetaColumns(modelConfig); if ( scoreMetaColumns != null ) { this.metaColumns = new ArrayList<String>(scoreMetaColumns); } String metaColumnNameFile = dataSet.getMetaColumnNameFile(); if(StringUtils.isNotBlank(metaColumnNameFile)) { String path = metaColumnNameFile; if ( SourceType.HDFS.equals(dataSet.getSource()) ) { PathFinder pathFinder = new PathFinder(modelConfig); File file = new File(metaColumnNameFile); path = new Path(pathFinder.getEvalSetPath(this), file.getName()).toString(); } String delimiter = StringUtils.isBlank(dataSet.getHeaderDelimiter()) ? dataSet.getDataDelimiter() : dataSet.getHeaderDelimiter(); List<String> rawMetaColumns = CommonUtils.readConfFileIntoList(path, dataSet.getSource(), delimiter); if( CollectionUtils.isNotEmpty(metaColumns) ) { for(String column: rawMetaColumns) { if(!metaColumns.contains(column)) { metaColumns.add(column); } } } else { metaColumns = rawMetaColumns; } } if ( this.metaColumns == null ) { this.metaColumns = new ArrayList<String>(); } } } } return metaColumns; } public String getName() { return name; } public void setName(String name) { this.name = name; } public synchronized RawSourceData getDataSet() { return dataSet; } public synchronized void setDataSet(RawSourceData dataSet) { this.dataSet = dataSet; } public Integer getPerformanceBucketNum() { return performanceBucketNum; } public void setPerformanceBucketNum(Integer performanceBucketNum) { this.performanceBucketNum = performanceBucketNum; } public synchronized String getScoreMetaColumnNameFile() { return scoreMetaColumnNameFile; } public synchronized void setScoreMetaColumnNameFile(String scoreMetaColumnNameFile) { this.scoreMetaColumnNameFile = scoreMetaColumnNameFile; } public String getPerformanceScoreSelector() { return performanceScoreSelector; } public void setPerformanceScoreSelector(String performanceScoreSelector) { this.performanceScoreSelector = performanceScoreSelector; } public Map<String, String> getCustomPaths() { return customPaths; } public void setCustomPaths(Map<String, String> customPaths) { this.customPaths = customPaths; } /** * @return the gbtConvertToProb */ public Boolean getGbtConvertToProb() { return gbtConvertToProb; } /** * @param gbtConvertToProb * the gbtConvertToProb to set */ public void setGbtConvertToProb(Boolean gbtConvertToProb) { this.gbtConvertToProb = gbtConvertToProb; } @Override public EvalConfig clone() { EvalConfig other = new EvalConfig(); other.setCustomPaths(new HashMap<String, String>(customPaths)); other.setDataSet(dataSet.clone()); other.setGbtConvertToProb(gbtConvertToProb); other.setName(name); other.setPerformanceBucketNum(performanceBucketNum); other.setPerformanceScoreSelector(performanceScoreSelector); other.setScoreMetaColumnNameFile(scoreMetaColumnNameFile); return other; } }