/* * Copyright [2013-2015] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.core.processor; import ml.shifu.shifu.container.obj.*; import ml.shifu.shifu.container.obj.RawSourceData.SourceType; import ml.shifu.shifu.core.validator.ModelInspector; import ml.shifu.shifu.executor.ExecutorManager; import ml.shifu.shifu.executor.ProcessManager; import ml.shifu.shifu.util.CommonUtils; import ml.shifu.shifu.util.Constants; import ml.shifu.shifu.util.Environment; import ml.shifu.shifu.util.JSONUtils; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileWriter; import java.io.FilenameFilter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; /** * Created by zhanhu on 12/5/16. */ public class ComboModelProcessor extends BasicModelProcessor implements Processor { private static Logger LOG = LoggerFactory.getLogger(ComboModelProcessor.class); public enum ComboStep { NEW, INIT, RUN, EVAL } public static final String ALG_DELIMITER = ","; public static final String SCORE_MEAN_FIELD = "mean"; public static final String TRAIN_EVAL_NAME = "EvalTrain"; private ComboStep comboStep; private String algorithms; private boolean isToShuffleData; @SuppressWarnings("unused") private boolean isToResume; private int comboMaxRetryTimes = 3; private List<ModelTrainConf.ALGORITHM> comboAlgs; private ComboModelTrain comboModelTrain; private ExecutorManager<Integer> excutorManager = new ExecutorManager<Integer>(); public ComboModelProcessor(ComboStep comboStep) { this.comboStep = comboStep; } public ComboModelProcessor(ComboStep comboStep, String algorithms) { this(comboStep); this.algorithms = algorithms; this.comboMaxRetryTimes = Environment.getInt("shifu.combo.max.retry", 3); } @Override public int run() throws Exception { LOG.info("Start to run combo, step - {}", this.comboStep); int status = 0; setUp(ModelInspector.ModelStep.COMBO); if ((status = validate()) > 0) { LOG.error("Validation Fail."); return status; } switch (comboStep) { case NEW: status = createNewCombo(); break; case INIT: this.comboModelTrain = loadComboTrain(); status = initComboModels(); break; case RUN: this.comboModelTrain = loadComboTrain(); status = runComboModels(); break; case EVAL: this.comboModelTrain = loadComboTrain(); status = evalComboModels(); break; } clearUp(ModelInspector.ModelStep.COMBO); return status; } public void setToShuffleData(boolean toShuffleData) { this.isToShuffleData = toShuffleData; } public void setToResume(boolean toResume) { this.isToResume = toResume; } @Override public void clearUp(ModelInspector.ModelStep modelStep) { this.excutorManager.forceShutDown(); } /** * Create ComboTrain.json according algorithm list. * ModelTrain configuration is set from template. * * @return if create new combo successful */ private int createNewCombo() { ComboModelTrain comboModelTrain = new ComboModelTrain(); List<SubTrainConf> subTrainConfList = new ArrayList<SubTrainConf>(this.comboAlgs.size()); for (int i = 0; i < this.comboAlgs.size(); i++) { subTrainConfList.add(createSubTrainConf(i, this.comboAlgs.size(), this.comboAlgs.get(i))); } comboModelTrain.setSubTrainConfList(subTrainConfList); return saveComboTrain(comboModelTrain); } /** * Create folder for sub-models, and create related files for sub-models. * All settings in sub-model can have their own settings - including stats/norm/varsel/train * * @return 0 successful, otherwise failed * @throws IOException any io exception */ private int initComboModels() throws IOException { if (this.comboModelTrain == null) { LOG.error("ComboModelTrain doesn't exists."); return 1; } // 0. create "EvalTrain" evaluation set in parent ModelConfig.json EvalConfig evalTrain = modelConfig.getEvalConfigByName(TRAIN_EVAL_NAME); if (evalTrain == null) { // create if not exists, if user run multi times, the "EvalTrain" may already exists evalTrain = new EvalConfig(); evalTrain.setName(TRAIN_EVAL_NAME); evalTrain.setDataSet(modelConfig.getDataSet().cloneRawSourceData()); modelConfig.getEvals().add(evalTrain); } // 1. create all sub-model folders and do the setting for each of them String[] subModelNames = new String[this.comboModelTrain.getSubTrainConfList().size() - 1]; for (int i = 0; i < this.comboModelTrain.getSubTrainConfList().size() - 1; i++) { SubTrainConf subTrainConf = this.comboModelTrain.getSubTrainConfList().get(i); String subModelName = subTrainConf.getModelName(); // 1.0) save all sub model names, it will used as variables for assemble model subModelNames[i] = subModelName; // 1.1) create folder for sub-model new File(subModelName).mkdirs(); // 1.2) create ModelConfig ModelConfig subModelConfig = this.modelConfig.clone(); if ( StringUtils.isNotBlank(subTrainConf.getDataFilterExpr()) ) { subModelConfig.getDataSet().setFilterExpressions(subTrainConf.getDataFilterExpr()); } subModelConfig.getBasic().setName(subModelName); subModelConfig.setStats(subTrainConf.getModelStatsConf()); subModelConfig.setNormalize(subTrainConf.getModelNormalizeConf()); subModelConfig.setVarSelect(subTrainConf.getModelVarSelectConf()); subModelConfig.setTrain(subTrainConf.getModelTrainConf()); // 1.3) copy all evaluation set except "EvalTrain" List<EvalConfig> subEvalConfigs = new ArrayList<EvalConfig>(); for (EvalConfig eval : modelConfig.getEvals()) { if (!eval.getName().equalsIgnoreCase(TRAIN_EVAL_NAME)) { EvalConfig subEval = eval.clone(); subEval.setPerformanceScoreSelector(SCORE_MEAN_FIELD); subEvalConfigs.add(subEval); } } subModelConfig.setEvals(subEvalConfigs); // 1.4) copy columns/ folder and save to sub-model's ModelConfig.json FileUtils.copyDirectory(new File(Constants.COLUMN_META_FOLDER_NAME), new File(subModelName, Constants.COLUMN_META_FOLDER_NAME)); saveModelConfig(subModelName, subModelConfig); } // 2. create assemble model folder and do setting for it // 2.0) clone from parent model config to generate assemble model config ModelConfig assembleModelConfig = this.modelConfig.clone(); String assembleModelName = this.comboModelTrain.getAssembleTrainConf().getModelName(); new File(assembleModelName).mkdirs(); assembleModelConfig.setModelSetName(assembleModelName); // 2.1) set the training data source to "EvalTrain" score of parent model if (SourceType.HDFS.equals(evalTrain.getDataSet().getSource())) { assembleModelConfig.getDataSet().setDataPath(pathFinder.getEvalScorePath(evalTrain)); assembleModelConfig.getDataSet().setHeaderPath(pathFinder.getEvalScoreHeaderPath(evalTrain)); } else if (SourceType.LOCAL.equals(evalTrain.getDataSet().getSource())) { File evalScoreFile = new File(pathFinder.getEvalScorePath(evalTrain)); assembleModelConfig.getDataSet().setDataPath(evalScoreFile.getAbsolutePath()); assembleModelConfig.getDataSet().setHeaderPath(""); } assembleModelConfig.getDataSet().setDataDelimiter("|"); assembleModelConfig.getDataSet().setHeaderDelimiter("|"); assembleModelConfig.getDataSet().setCategoricalColumnNameFile(null); FileUtils.copyDirectory(new File(Constants.COLUMN_META_FOLDER_NAME), new File(assembleModelName, Constants.COLUMN_META_FOLDER_NAME)); // 2.2) create force selects file assembleModelConfig.getVarSelect().setForceSelectColumnNameFile(null); assembleModelConfig.getVarSelect().setForceRemoveColumnNameFile(null); assembleModelConfig.getVarSelect().setForceEnable(true); assembleModelConfig.getVarSelect().setFilterNum(subModelNames.length); assembleModelConfig.getVarSelect().setFilterEnable(true); // 2.3) set the evaluation data source to the eval's output of parent model List<EvalConfig> assembleEvalConfigs = new ArrayList<EvalConfig>(); for (EvalConfig eval : modelConfig.getEvals()) { if (!eval.getName().equalsIgnoreCase(TRAIN_EVAL_NAME)) { EvalConfig assembleEval = eval.clone(); if (SourceType.HDFS.equals(eval.getDataSet().getSource())) { assembleEval.getDataSet().setDataPath(pathFinder.getEvalScorePath(eval)); assembleEval.getDataSet().setHeaderPath(pathFinder.getEvalScoreHeaderPath(eval)); } else if (SourceType.LOCAL.equals(eval.getDataSet().getSource())) { File evalScoreFile = new File(pathFinder.getEvalScorePath(eval)); assembleEval.getDataSet().setDataPath(evalScoreFile.getAbsolutePath()); assembleEval.getDataSet().setHeaderPath(""); } assembleEval.getDataSet().setDataDelimiter("|"); assembleEval.getDataSet().setHeaderDelimiter("|"); assembleEval.setPerformanceScoreSelector(SCORE_MEAN_FIELD); assembleEvalConfigs.add(assembleEval); } eval.setPerformanceScoreSelector(formatPigNS(subModelNames[0]) + "::" + SCORE_MEAN_FIELD); } assembleModelConfig.setEvals(assembleEvalConfigs); // 2.4) save assemble ModelConfig.json saveModelConfig(assembleModelName, assembleModelConfig); // 3. save parent model config saveModelConfig(); return 0; } /** * Start to train combo models * 1) train sub-models and evaluate sub-model train-eval set (train data as evaluation data) * 2) join train-eval set output for training assemble model * 3) train assemble model * * @return status of execution * 0 - success * others - fail * @throws IOException any io exception */ public int runComboModels() throws IOException { int status = 0; // 0. ran stats, normalization, variable selection, train sub models List<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>(); for (int i = 0; i < this.comboModelTrain.getSubTrainConfList().size() - 1; i++) { SubTrainConf subTrainConf = this.comboModelTrain.getSubTrainConfList().get(i); String subModelName = subTrainConf.getModelName(); Callable<Integer> task = createSubModelTrainTasks(subModelName, genEvalTrainName()); if (task != null) { tasks.add(task); } } if (hasFailTaskResults(this.excutorManager.submitTasksAndRetryIfFail(tasks, this.comboMaxRetryTimes))) { LOG.error("There are errors when training and evaluating sub-models. Please check log."); return 1; } // 1. copy sub model specs to main model directory List<String> subModelScores = new ArrayList<String>(); File modelsDir = new File(Constants.MODELS); modelsDir.mkdirs(); // create if not exist for (int i = 0; i < this.comboModelTrain.getSubTrainConfList().size() - 1; i++) { SubTrainConf subTrainConf = this.comboModelTrain.getSubTrainConfList().get(i); String subModelName = subTrainConf.getModelName(); // create sub directory under models/ File subModelsDir = new File(modelsDir, subModelName); subModelsDir.mkdirs(); // copy ModelConfig.json and ColumnConfig.json FileUtils.copyFile(new File(subModelName, Constants.MODEL_CONFIG_JSON_FILE_NAME), new File(subModelsDir, Constants.MODEL_CONFIG_JSON_FILE_NAME)); FileUtils.copyFile(new File(subModelName, Constants.COLUMN_CONFIG_JSON_FILE_NAME), new File(subModelsDir, Constants.COLUMN_CONFIG_JSON_FILE_NAME)); File[] modelFiles = (new File(subModelName, Constants.MODELS)).listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return !name.startsWith("."); } }); for (int k = 0; k < modelFiles.length; k++) { File modelFile = modelFiles[k]; FileUtils.copyFile(modelFile, new File(new File(Constants.MODELS, subModelName), modelFile.getName())); subModelScores.add(formatPigNS(subModelName) + "::model" + k); } } // 2. eval all sub-models in parent model for ( EvalConfig eval : modelConfig.getEvals() ) { ProcessManager.runShellProcess(".", new String[][]{ new String[]{"shifu", "eval", "-score", eval.getName()}}); } // 3. update the forceselect file for assemble model String assembleModelName = this.comboModelTrain.getAssembleTrainConf().getModelName(); ModelConfig assembleModelConfig = CommonUtils.loadModelConfig( assembleModelName + File.separator + Constants.MODEL_CONFIG_JSON_FILE_NAME, SourceType.LOCAL); String forceSelectNames = createModelNamesFile(assembleModelName, assembleModelName + ".forceselect", null, subModelScores.toArray(new String[0])); assembleModelConfig.getVarSelect().setFilterNum(subModelScores.size()); assembleModelConfig.getVarSelect().setForceSelectColumnNameFile(forceSelectNames); saveModelConfig(assembleModelName, assembleModelConfig); // 4. run the whole process for assemble model ProcessManager.runShellProcess(assembleModelName, new String[][]{ new String[]{"shifu", "init"}, new String[]{"shifu", "stats"}, new String[]{"shifu", "norm", "-shuffle"}, new String[]{"shifu", "varsel"}, new String[]{"shifu", "train", "-shuffle"}}); LOG.info("Finish to run combo train."); return status; } /** * Evaluate the Combo model performance * 1. Evaluate all evaluation sets in sub models; * 2. Run evaluation for assemble model * * @return 0 success, otherwise failed * @throws IOException any io exception */ private int evalComboModels() throws IOException { int status = 0; // 1. For all sub-models, evaluate all evaluation sets except training data set List<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>(); for (EvalConfig evalConfig : this.modelConfig.getEvals()) { if (!evalConfig.getName().equalsIgnoreCase(TRAIN_EVAL_NAME)) { tasks.addAll(createEvaluateTasks(evalConfig.getName())); if (hasFailTaskResults(this.excutorManager.submitTasksAndRetryIfFail(tasks, this.comboMaxRetryTimes))) { LOG.error("Error occurred when evaluate sub-models. Please check log!"); return 1; } tasks.clear(); } } LOG.info("Finish to eval combo train."); return status; } /** * Create train and eval task for sub-model * * @param subModelName sub model name * @param evalSetName eval set name * @return callable instance */ private Callable<Integer> createSubModelTrainTasks(final String subModelName, final String evalSetName) throws IOException { return new Callable<Integer>() { @Override public Integer call() { try { if (isToShuffleData) { return ProcessManager.runShellProcess(subModelName, new String[][]{ new String[]{"shifu", "init"}, new String[]{"shifu", "stats"}, new String[]{"shifu", "norm", "-shuffle"}, new String[]{"shifu", "varsel"}, new String[]{"shifu", "train", "-shuffle"}}); } else { return ProcessManager.runShellProcess(subModelName, new String[][]{ new String[]{"shifu", "init"}, new String[]{"shifu", "stats"}, new String[]{"shifu", "norm",}, new String[]{"shifu", "varsel"}, new String[]{"shifu", "train"}}); } } catch (IOException e) { LOG.error("Fail to run commands.", e); return 1; } } }; } /** * Create evaluation tasks for all sub-models * * @param evalName - the evalset to evaluate * @return list of callable instance * @throws IOException any io exception */ private List<Callable<Integer>> createEvaluateTasks(final String evalName) throws IOException { List<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>(); for (int i = 0; i < this.comboModelTrain.getSubTrainConfList().size(); i++) { SubTrainConf subTrainConf = this.comboModelTrain.getSubTrainConfList().get(i); String evalModelName = null; if (i == this.comboModelTrain.getSubTrainConfList().size() - 1) { evalModelName = subTrainConf.getModelName(); } else { evalModelName = subTrainConf.getModelName(); } final String subModelName = evalModelName; tasks.add(new Callable<Integer>() { @Override public Integer call() { try { return ProcessManager.runShellProcess(subModelName, new String[][]{new String[]{"shifu", "eval", "-run", evalName}}); } catch (IOException e) { LOG.error("Fail to run commands.", e); return 1; } } }); } return tasks; } /** * Shifu combo processor validation * * @return 0 - success * other - fail */ private int validate() { if (ComboStep.NEW.equals(this.comboStep)) { return validate(this.algorithms); } else { File comboTrainFile = new File(Constants.COMBO_CONFIG_JSON_FILE_NAME); if (!comboTrainFile.exists()) { LOG.error("{} doesn't exist. Please run `shifu combo -new <algorithms>` firstly.", Constants.COMBO_CONFIG_JSON_FILE_NAME); return 1; } } return 0; } /** * Validate the algorithms from user's input * * @param algorithms - algorithm list that user want to combo * @return 0 - success * other - fail */ private int validate(String algorithms) { if (StringUtils.isBlank(algorithms)) { LOG.error("The combo algorithms should not be empty"); return 1; } String[] algs = algorithms.split(ALG_DELIMITER); if (algs.length < 3) { LOG.error("At least, you should have 2 basic algorithms, and 1 assembling algorithm."); return 2; } this.comboAlgs = new ArrayList<ModelTrainConf.ALGORITHM>(); for (String alg : algs) { try { ModelTrainConf.ALGORITHM algorithm = ModelTrainConf.ALGORITHM.valueOf(alg); if (algorithm == null) { LOG.error("Unsupported algorithm - {}", alg); return 3; } this.comboAlgs.add(algorithm); } catch (Throwable t) { LOG.error("Unsupported algorithm - {}", alg); return 3; } } return 0; } /** * Create SubTrainConf according the @ModelTrainConf.ALGORITHM * * @param alg - the algorithm, see @ModelTrainConf.ALGORITHM * @return sub train config instance */ private SubTrainConf createSubTrainConf(int seq, int total, ModelTrainConf.ALGORITHM alg) { SubTrainConf subTrainConf = new SubTrainConf(); subTrainConf.setModelStatsConf(createModelStatsConf(alg)); subTrainConf.setModelNormalizeConf(createModelNormalizeConf(alg)); subTrainConf.setModelVarSelectConf(createModelVarSelectConf(alg)); subTrainConf.setModelTrainConf(createModelTrainConf(alg)); if ( seq < total - 1) { subTrainConf.setModelName(genSubModelName(seq, subTrainConf)); } else { subTrainConf.setModelName(genAssembleModelName(modelConfig.getModelSetName())); } return subTrainConf; } /** * Create ModelStatsConf according the @ModelTrainConf.ALGORITHM * * @param alg - the algorithm, see @ModelTrainConf.ALGORITHM * @return stats config instance */ private ModelStatsConf createModelStatsConf(ModelTrainConf.ALGORITHM alg) { ModelStatsConf statsConf = new ModelStatsConf(); if (ModelTrainConf.ALGORITHM.NN.equals(alg) || ModelTrainConf.ALGORITHM.LR.equals(alg)) { statsConf.setBinningAlgorithm(ModelStatsConf.BinningAlgorithm.DynamicBinning); statsConf.setBinningMethod(ModelStatsConf.BinningMethod.EqualTotal); statsConf.setMaxNumBin(20); } else if (ModelTrainConf.ALGORITHM.RF.equals(alg) || ModelTrainConf.ALGORITHM.GBT.equals(alg)) { statsConf.setBinningAlgorithm(ModelStatsConf.BinningAlgorithm.SPDTI); statsConf.setBinningMethod(ModelStatsConf.BinningMethod.EqualPositive); statsConf.setMaxNumBin(20); } return statsConf; } /** * Create ModelNormalizeConf according the @ModelTrainConf.ALGORITHM * * @param alg - the algorithm, see @ModelTrainConf.ALGORITHM * @return normalize config instance */ private ModelNormalizeConf createModelNormalizeConf(ModelTrainConf.ALGORITHM alg) { ModelNormalizeConf normalizeConf = new ModelNormalizeConf(); normalizeConf.setNormType(ModelNormalizeConf.NormType.WOE); normalizeConf.setSampleNegOnly(false); normalizeConf.setSampleRate(1.0); return normalizeConf; } /** * Create ModelVarSelectConf according the @ModelTrainConf.ALGORITHM * * @param alg - the algorithm, see @ModelTrainConf.ALGORITHM * @return varselect config instance */ private ModelVarSelectConf createModelVarSelectConf(ModelTrainConf.ALGORITHM alg) { ModelVarSelectConf varSelectConf = new ModelVarSelectConf(); varSelectConf.setFilterNum(20); if (ModelTrainConf.ALGORITHM.NN.equals(alg) || ModelTrainConf.ALGORITHM.LR.equals(alg)) { varSelectConf.setFilterBy("IV"); } else if (ModelTrainConf.ALGORITHM.RF.equals(alg) || ModelTrainConf.ALGORITHM.GBT.equals(alg)) { varSelectConf.setFilterBy("KS"); } return varSelectConf; } /** * Create @ModelTrainConf according the @ModelTrainConf.ALGORITHM * * @param alg - the algorithm, see @ModelTrainConf.ALGORITHM * @return train config instance */ private ModelTrainConf createModelTrainConf(ModelTrainConf.ALGORITHM alg) { ModelTrainConf trainConf = new ModelTrainConf(); trainConf.setAlgorithm(alg.name()); trainConf.setEpochsPerIteration(1); trainConf.setParams(ModelTrainConf.createParamsByAlg(alg, trainConf)); trainConf.setNumTrainEpochs(100); if (ModelTrainConf.ALGORITHM.NN.equals(alg)) { trainConf.setNumTrainEpochs(200); } else if (ModelTrainConf.ALGORITHM.SVM.equals(alg)) { trainConf.setNumTrainEpochs(100); } else if (ModelTrainConf.ALGORITHM.RF.equals(alg)) { trainConf.setNumTrainEpochs(40000); } else if (ModelTrainConf.ALGORITHM.GBT.equals(alg)) { trainConf.setNumTrainEpochs(40000); } else if (ModelTrainConf.ALGORITHM.LR.equals(alg)) { trainConf.setNumTrainEpochs(100); } trainConf.setBaggingWithReplacement(true); return trainConf; } /** * Generate sub model name * * @param seq - sequence to keep unique * @param subTrainConf SubTrainConf * @return sub model name */ private String genSubModelName(int seq, SubTrainConf subTrainConf) { return this.modelConfig.getBasic().getName() + "_" + subTrainConf.getModelTrainConf().getAlgorithm() + "_" + seq; } /** * Generate assemble model name * * @param modelName model name * @return assemble model bane */ private String genAssembleModelName(String modelName) { return modelName + "_" + Constants.COMBO_ASSEMBLE; } /** * Generate train data evaluation set name * * @return eval train name */ private String genEvalTrainName() { return Constants.COMBO_EVAL_TRAIN; } /** * Save ComboTrain.json into local directory * * @param comboModelTrain combo model train instance * @return 0 success, otherwise failed */ private int saveComboTrain(ComboModelTrain comboModelTrain) { try { JSONUtils.writeValue(new File(Constants.COMBO_CONFIG_JSON_FILE_NAME), comboModelTrain); } catch (Exception e) { LOG.error("Fail to save ComboModelTrain object to ComboTrain.json"); return 1; } return 0; } /** * Load ComboModelTrain from ComboTrain.json * * @return combo model train instance, null if exception */ private ComboModelTrain loadComboTrain() { try { return JSONUtils.readValue(new File(Constants.COMBO_CONFIG_JSON_FILE_NAME), ComboModelTrain.class); } catch (Exception e) { LOG.error("Fail to load ComboModelTrain object from ComboTrain.json"); return null; } } /** * Save ModelConfig into some folder * * @param folder - folder to host ModelConfig.json * @param modelConfig model config instance * @throws IOException any io exception */ private void saveModelConfig(String folder, ModelConfig modelConfig) throws IOException { JSONUtils.writeValue(new File(folder + File.separator + Constants.MODEL_CONFIG_JSON_FILE_NAME), modelConfig); } /** * Check whether there is any fail results in the list * * @param taskResults * @return true - there is any fail result * false - no fail task */ private boolean hasFailTaskResults(List<Integer> taskResults) { if (CollectionUtils.isNotEmpty(taskResults)) { for (Integer result : taskResults) { if (result == null || result != 0) { LOG.error("Found some abnormal result - {}", result); return true; } } } return false; } /** * Create configuration file for sub-model * if the configuration for parent model exists, it will copy that parent configuration firstly * and append new content. * * @param subModelName sub model name * @param namesPrefix prefix of name * @param parentNamesFile parent names of files * @param varNames var names * @return model file name * @throws IOException any io exception */ private String createModelNamesFile(String subModelName, String namesPrefix, String parentNamesFile, String... varNames) throws IOException { String modelNamesCfg = namesPrefix + ".names"; File mnFile = new File(subModelName + File.separator + modelNamesCfg); // copy existing meta file if (StringUtils.isNotBlank(parentNamesFile)) { FileUtils.copyFile(new File(parentNamesFile), mnFile); } // append uid column as meta FileWriter writer = new FileWriter(mnFile); try { for (String var : varNames) { writer.append(var + "\n"); } } catch (IOException e) { // skip it } finally { writer.close(); } return modelNamesCfg; } private String formatPigNS(String name) { return name.replaceAll("-", "_"); } }