/* * Copyright [2013-2014] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.guagua.mapreduce; import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import ml.shifu.guagua.GuaguaConstants; import ml.shifu.guagua.GuaguaRuntimeException; import ml.shifu.guagua.coordinator.zk.ZooKeeperUtils; import ml.shifu.guagua.hadoop.io.GuaguaOptionsParser; import ml.shifu.guagua.hadoop.io.GuaguaWritableSerializer; import ml.shifu.guagua.hadoop.util.HDPUtils; import ml.shifu.guagua.io.Bytable; import ml.shifu.guagua.io.HaltBytable; import ml.shifu.guagua.master.MasterComputable; import ml.shifu.guagua.util.ReflectionUtils; import ml.shifu.guagua.worker.WorkerComputable; import org.apache.commons.cli.CommandLine; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobID; import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob; import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * {@link GuaguaMapReduceClient} is the entry point for guagua mapreduce implementation application. * * <p> * To use it in normal Hadoop mode. Use {@link #main(String[])} as entry point. * * <p> * To run jobs in parallel: * * <pre> * GuaguaMapReduceClient client = new GuaguaMapReduceClient(); * client.addJob(args); * client.addJob(args); * client.run(); * </pre> * * <p> * WARNING: In one GuaguaMapReduceClient instance, {@link #addJob(String[])} to make sure job names are no duplicated. * * <p> * If one job is failed, it will be re-submitted again and try, if failed times over two, no re-try. */ public class GuaguaMapReduceClient { static { // pick up new conf XML file and populate it with stuff exported from client Configuration.addDefaultResource(GuaguaConstants.GUAGUA_SITE_FILE); } private static final Logger LOG = LoggerFactory.getLogger(GuaguaMapReduceClient.class); private static final String INIT_JOB_ID_PREFIX = "Guagua-MapReduce-"; private static String embededZooKeeperServer = null; /** * Make guagua run jobs in parallel. */ private JobControl jc; private int jobIndex = 0; private Map<String, Integer> jobIndexMap = new HashMap<String, Integer>(); private Map<Integer, Integer> jobRunningTimes = new HashMap<Integer, Integer>(); private Map<Integer, String[]> jobIndexParams = new HashMap<Integer, String[]>(); private Set<String> failedCheckingJobs = new HashSet<String>(); private static Map<String, Long> firstMasterSuccessTimeMap = new HashMap<String, Long>(); private static Set<String> killedSuccessJobSet = new HashSet<String>(); /** * Default constructor. Construct default JobControl instance. */ public GuaguaMapReduceClient() { this.jc = new JobControl(INIT_JOB_ID_PREFIX); } /** * Add new job to JobControl instance. */ public synchronized void addJob(String[] args) throws IOException { Job job = createJob(args); this.jc.addJob(new ControlledJob(job, null)); if(this.jobIndexMap.containsKey(job.getJobName())) { throw new IllegalStateException("Job name should be unique. please check name with: " + job.getJobName()); } this.jobIndexMap.put(job.getJobName(), this.jobIndex); this.jobIndexParams.put(this.jobIndex, args); this.jobRunningTimes.put(this.jobIndex, 1); this.jobIndex += 1; } /** * Run all jobs added to JobControl. */ public void run() throws IOException { // Initially, all jobs are in wait state. List<ControlledJob> jobsWithoutIds = this.jc.getWaitingJobList(); int totalNeededMRJobs = jobsWithoutIds.size(); LOG.info("{} map-reduce job(s) waiting for submission.", jobsWithoutIds.size()); Thread jcThread = new Thread(this.jc, "Guagua-MapReduce-JobControl"); jcThread.start(); JobClient jobClient = new JobClient(new JobConf(new Configuration())); double lastProg = -1; Set<String> finalSucessfulJobIDs = new HashSet<String>(); while(!this.jc.allFinished()) { try { jcThread.join(1000); } catch (InterruptedException ignore) { Thread.currentThread().interrupt(); } List<ControlledJob> jobsAssignedIdInThisRun = new ArrayList<ControlledJob>(totalNeededMRJobs); for(ControlledJob job: jobsWithoutIds) { if(job.getJob().getJobID() != null) { jobsAssignedIdInThisRun.add(job); LOG.info("Job {} is started.", job.getJob().getJobID().toString()); } else { // This job is not assigned an id yet. } } jobsWithoutIds.removeAll(jobsAssignedIdInThisRun); List<ControlledJob> runningJobs = jc.getRunningJobList(); for(ControlledJob controlledJob: runningJobs) { String jobId = controlledJob.getJob().getJobID().toString(); Counters counters = getCounters(controlledJob.getJob()); Counter doneMaster = counters.findCounter(GuaguaMapReduceConstants.GUAGUA_STATUS, GuaguaMapReduceConstants.MASTER_SUCCESS); Counter doneWorkers = counters.findCounter(GuaguaMapReduceConstants.GUAGUA_STATUS, GuaguaMapReduceConstants.DONE_WORKERS); if((doneMaster != null && doneMaster.getValue() > 0) || (doneWorkers != null && doneWorkers.getValue() > 0)) { // master is done, while workers may be not, wait for at most 2 minutes check // or workers are all done, while master is not, wait for at most 2 minutes check Long initTime = firstMasterSuccessTimeMap.get(jobId); if(initTime == null) { firstMasterSuccessTimeMap.put(jobId, System.currentTimeMillis()); } else { if(System.currentTimeMillis() - initTime >= 2 * 60 * 1000L) { killedSuccessJobSet.add(jobId); killJob(controlledJob.getJob().getConfiguration(), jobId, "Kill job " + jobId + " because of master is already finished, job " + jobId + " is treated as successful as we got models. "); // wait extra 1s to wait for job to be stopped try { Thread.sleep(1 * 1000L); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } } } } List<ControlledJob> successfulJobs = jc.getSuccessfulJobList(); for(ControlledJob controlledJob: successfulJobs) { String jobId = controlledJob.getJob().getJobID().toString(); if(!finalSucessfulJobIDs.contains(jobId)) { LOG.info("Job {} is successful.", jobId); finalSucessfulJobIDs.add(jobId); } } List<ControlledJob> failedJobs = jc.getFailedJobList(); for(ControlledJob controlledJob: failedJobs) { String failedJobId = controlledJob.getJob().getJobID().toString(); if(killedSuccessJobSet.contains(failedJobId)) { if(!finalSucessfulJobIDs.contains(failedJobId)) { LOG.info("Job {} is successful.", failedJobId); finalSucessfulJobIDs.add(failedJobId); } continue; } if(!this.failedCheckingJobs.contains(failedJobId)) { this.failedCheckingJobs.add(failedJobId); String jobName = controlledJob.getJob().getJobName(); Integer jobIndex = this.jobIndexMap.get(jobName); Integer runTimes = this.jobRunningTimes.get(jobIndex); if(runTimes <= 1) { LOG.warn("Job {} is failed, will be submitted again.", jobName); Job newJob = createJob(this.jobIndexParams.get(jobIndex)); this.jc.addJob(new ControlledJob(newJob, null)); this.jobRunningTimes.put(jobIndex, runTimes + 1); this.jobIndexMap.put(newJob.getJobName(), jobIndex); jobsWithoutIds = this.jc.getWaitingJobList(); } else { LOG.warn("Job {} is failed twice, will not be submitted again.", jobName); } } } double prog = calculateProgress(finalSucessfulJobIDs, jc, jobClient) / totalNeededMRJobs; notifyProgress(prog, lastProg); lastProg = prog; try { Thread.sleep(2 * 1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } List<ControlledJob> successfulJobs = jc.getSuccessfulJobList(); List<ControlledJob> failedJobs = jc.getFailedJobList(); LOG.debug("success {}; failed {}; total needed {}", successfulJobs.size(), failedJobs.size(), totalNeededMRJobs); for(ControlledJob controlledJob: successfulJobs) { LOG.info("Sucessful job:"); LOG.info("Job: {} ", controlledJob); } if(totalNeededMRJobs == finalSucessfulJobIDs.size()) { LOG.info("Guagua jobs: 100% complete"); // add failed jobs to debug since all jobs are finished. if(failedJobs != null && failedJobs.size() > 0) { for(ControlledJob controlledJob: failedJobs) { if(finalSucessfulJobIDs.contains(controlledJob.getJob().getJobID().toString())) { LOG.info("Successful job although failed state (job is treated as successful):"); LOG.warn("Job: {} ", toFakedStateString(controlledJob)); } } } } else { if(failedJobs != null && failedJobs.size() > 0) { for(ControlledJob controlledJob: failedJobs) { Counters counters = getCounters(controlledJob.getJob()); if(counters != null) { Counter doneMaster = counters.findCounter(GuaguaMapReduceConstants.GUAGUA_STATUS, GuaguaMapReduceConstants.MASTER_SUCCESS); Counter doneWorkers = counters.findCounter(GuaguaMapReduceConstants.GUAGUA_STATUS, GuaguaMapReduceConstants.DONE_WORKERS); if((doneMaster != null && doneMaster.getValue() > 0) || (doneWorkers != null && doneWorkers.getValue() > 0)) { LOG.info("Successful job although failed state (job is treated as successful):"); LOG.warn("Job: {} ", toFakedStateString(controlledJob)); } else { LOG.info("Failed job:"); LOG.warn("Job: {} ", controlledJob); } } } } } this.jc.stop(); } private static Counters getCounters(Job job) { try { return job.getCounters(); } catch (Exception e) { // no matter IOException or IllegalStateException, just return null; return null; } } private static void killJob(Configuration conf, String jobIdStr, String reason) { LOG.info(reason); // "Kill job because of master is already finished try { org.apache.hadoop.mapred.JobClient jobClient = new org.apache.hadoop.mapred.JobClient( (org.apache.hadoop.mapred.JobConf) conf); JobID jobId = JobID.forName(jobIdStr); RunningJob job = jobClient.getJob(jobId); job.killJob(); } catch (IOException ioe) { throw new GuaguaRuntimeException(ioe); } } public String toFakedStateString(ControlledJob controlledJob) { StringBuffer sb = new StringBuffer(); sb.append("job name:\t").append(controlledJob.getJob().getJobName()).append("\n"); sb.append("job id:\t").append(controlledJob.getJobID()).append("\n"); sb.append("job state:\t").append("SUCCESS").append("\n"); sb.append("job mapred id:\t").append(controlledJob.getJob().getJobID()).append("\n"); sb.append("job message:\t").append(" successful job").append("\n"); sb.append("job has no depending job:\t").append("\n"); return sb.toString(); } /** * Log the progress and notify listeners if there is sufficient progress * * @param prog * current progress * @param lastProg * progress last time */ private void notifyProgress(double prog, double lastProg) { if(prog >= (lastProg + 0.01)) { int perCom = (int) (prog * 100); if(perCom != 100) { LOG.info("Guagua jobs: {}% complete.", perCom); } } } /** * Compute the progress of the current job submitted through the JobControl object jc to the JobClient jobClient * * @param jc * The JobControl object that has been submitted * @param jobClient * The JobClient to which it has been submitted * @return The progress as a percentage in double format * @throws IOException * In case any IOException connecting to JobTracker. */ protected double calculateProgress(Set<String> successJobs, JobControl jc, JobClient jobClient) throws IOException { double prog = 0.0; prog += Math.max(jc.getSuccessfulJobList().size(), successJobs.size()); List<ControlledJob> runnJobs = jc.getRunningJobList(); for(ControlledJob cjob: runnJobs) { prog += progressOfRunningJob(cjob, jobClient); } return prog; } /** * Returns the progress of a Job j which is part of a submitted JobControl object. The progress is for this Job. So * it has to be scaled down by the number of jobs that are present in the JobControl. * * @param cjob * - The Job for which progress is required * @param jobClient * - the JobClient to which it has been submitted * @return Returns the percentage progress of this Job * @throws IOException * In case any IOException connecting to JobTracker. */ protected double progressOfRunningJob(ControlledJob cjob, JobClient jobClient) throws IOException { @SuppressWarnings("deprecation") RunningJob rj = jobClient.getJob(cjob.getJob().getJobID().toString()); if(rj == null && cjob.getJobState() == ControlledJob.State.SUCCESS) return 1; else if(rj == null) return 0; else { return rj.mapProgress(); } } public static void addInputPath(Configuration conf, Path path) throws IOException { path = path.getFileSystem(conf).makeQualified(path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get(GuaguaMapReduceConstants.MAPRED_INPUT_DIR); conf.set(GuaguaMapReduceConstants.MAPRED_INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr); } /** * Create Hadoop job according to arguments from main. */ @SuppressWarnings("deprecation") public synchronized Job createJob(String[] args) throws IOException { Configuration conf = new Configuration(); // set it here to make it can be over-written. Set task timeout to a long period 20 minutes. conf.setInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, conf.getInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, 1800000)); conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_TASK_TIMEOUT, conf.getInt(GuaguaMapReduceConstants.MAPREDUCE_TASK_TIMEOUT, 1800000)); GuaguaOptionsParser parser = new GuaguaOptionsParser(conf, args); // process a bug on hdp 2.2.4 String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if(hdpVersion != null && hdpVersion.length() != 0) { conf.set("hdp.version", hdpVersion); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf); } CommandLine cmdLine = parser.getCommandLine(); checkInputSetting(conf, cmdLine); checkZkServerSetting(conf, cmdLine); checkWorkerClassSetting(conf, cmdLine); checkMasterClassSetting(conf, cmdLine); checkIterationCountSetting(conf, cmdLine); checkResultClassSetting(conf, cmdLine); String name = checkMapReduceNameSetting(cmdLine); @SuppressWarnings("rawtypes") Class<? extends InputFormat> inputFormatClass = checkInputFormatSetting(cmdLine); // set map reduce parameters for specified master-workers architecture // speculative execution should be disabled, for old ones conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, false); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, false); // for new hadoop speculative parameters conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_MAP_SPECULATIVE, false); conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_REDUCE_SPECULATIVE, false); // set mapreduce.job.max.split.locations to 100 to suppress warnings int maxSplits = conf.getInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 100); if(maxSplits < 100) { maxSplits = 100; } conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, maxSplits); // Set cache to 0. conf.setInt(GuaguaMapReduceConstants.IO_SORT_MB, 0); // Most users won't hit this hopefully and can set it higher if desired conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_COUNTERS_LIMIT, conf.getInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_COUNTERS_LIMIT, 512)); conf.setInt(GuaguaMapReduceConstants.MAPRED_JOB_REDUCE_MEMORY_MB, 0); // append concurrent gc to avoid long gc stop-the-world String childJavaOpts = conf.get(GuaguaMapReduceConstants.MAPRED_CHILD_JAVA_OPTS, ""); if(childJavaOpts == null || childJavaOpts.length() == 0) { conf.set(GuaguaMapReduceConstants.MAPRED_CHILD_JAVA_OPTS, GuaguaMapReduceConstants.MAPRED_DEFAULT_CHILD_JAVA_OPTS); } else { String newChildJavaOpts = GuaguaMapReduceConstants.MAPRED_DEFAULT_CHILD_JAVA_OPTS + " " + childJavaOpts; conf.set(GuaguaMapReduceConstants.MAPRED_CHILD_JAVA_OPTS, newChildJavaOpts.trim()); } Job job = new Job(conf, name); job.setJarByClass(GuaguaMapReduceClient.class); job.setMapperClass(GuaguaMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(GuaguaOutputFormat.class); job.setNumReduceTasks(0); return job; } @SuppressWarnings({ "unchecked", "rawtypes" }) private static Class<? extends InputFormat> checkInputFormatSetting(CommandLine cmdLine) { Class<? extends InputFormat> inputFormatClass = GuaguaInputFormat.class; if(cmdLine.hasOption("-inputformat")) { String inputFormatClassName = cmdLine.getOptionValue("inputformat"); try { inputFormatClass = (Class<? extends InputFormat>) Class.forName(inputFormatClassName.trim()); } catch (ClassNotFoundException e) { printUsage(); throw new IllegalArgumentException(String.format( "The inputformat class %s set by '-inputformat' can not be found in class path.", inputFormatClassName.trim()), e); } catch (ClassCastException e) { printUsage(); throw new IllegalArgumentException("Mapreduce input format class set by 'inputformat' should extend " + "'org.apache.hadoop.mapreduce.InputFormat' class."); } } return inputFormatClass; } private static String checkMapReduceNameSetting(CommandLine cmdLine) { String name = "Guagua Master-Workers Job"; if(cmdLine.hasOption("-n")) { name = cmdLine.getOptionValue("n"); } return name; } private static void checkResultClassSetting(Configuration conf, CommandLine cmdLine) { Class<?> masterResultClass; if(!cmdLine.hasOption("-mr")) { printUsage(); throw new IllegalArgumentException("Master result class name should be provided by '-mr' parameter."); } else { String resultClassName = cmdLine.getOptionValue("mr").trim(); try { masterResultClass = Class.forName(resultClassName); } catch (ClassNotFoundException e) { throw new IllegalArgumentException(String.format( "Master result class %s set by '-mr' can not be found in class path.", resultClassName), e); } if(Writable.class.isAssignableFrom(masterResultClass)) { conf.set(GuaguaConstants.GUAGUA_MASTER_IO_SERIALIZER, GuaguaWritableSerializer.class.getName()); conf.set(GuaguaConstants.GUAGUA_MASTER_RESULT_CLASS, resultClassName); } else if(Bytable.class.isAssignableFrom(masterResultClass)) { conf.set(GuaguaConstants.GUAGUA_MASTER_RESULT_CLASS, resultClassName); if(!ReflectionUtils.hasEmptyParameterConstructor(masterResultClass)) { throw new IllegalArgumentException( "Master result class should have default constuctor without any parameters."); } } else { printUsage(); throw new IllegalArgumentException( "Master result class name provided by '-mr' parameter should implement " + "'com.paypal.guagua.io.Bytable' or 'org.apache.hadoop.io.Writable'."); } } Class<?> workerResultClass; if(!cmdLine.hasOption("-wr")) { printUsage(); throw new IllegalArgumentException("Worker result class name should be provided by '-wr' parameter."); } else { String resultClassName = cmdLine.getOptionValue("wr").trim(); try { workerResultClass = Class.forName(resultClassName); } catch (ClassNotFoundException e) { printUsage(); throw new IllegalArgumentException(String.format( "Worker result class %s set by '-wr' can not be found in class path.", resultClassName), e); } if(Writable.class.isAssignableFrom(workerResultClass)) { conf.set(GuaguaConstants.GUAGUA_WORKER_IO_SERIALIZER, GuaguaWritableSerializer.class.getName()); conf.set(GuaguaConstants.GUAGUA_WORKER_RESULT_CLASS, resultClassName); } else if(Bytable.class.isAssignableFrom(workerResultClass)) { conf.set(GuaguaConstants.GUAGUA_WORKER_RESULT_CLASS, resultClassName); if(!ReflectionUtils.hasEmptyParameterConstructor(workerResultClass)) { throw new IllegalArgumentException( "Worker result class should have default constuctor without any parameters."); } } else { printUsage(); throw new IllegalArgumentException( "Worker result class name provided by '-wr' parameter should implement " + "'com.paypal.guagua.io.Bytable' or 'org.apache.hadoop.io.Writable'."); } } if(HaltBytable.class.isAssignableFrom(masterResultClass) && !HaltBytable.class.isAssignableFrom(workerResultClass) || HaltBytable.class.isAssignableFrom(workerResultClass) && !HaltBytable.class.isAssignableFrom(masterResultClass)) { printUsage(); throw new IllegalArgumentException("Worker and master result classes should both implementent HaltBytable."); } } private static void checkIterationCountSetting(Configuration conf, CommandLine cmdLine) { if(!cmdLine.hasOption("-c")) { System.err.println("WARN: Total iteration number is not set, default 50 will be used."); System.err.println("WARN: Total iteration number can be provided by '-c' parameter with non-empty value."); conf.setInt(GuaguaConstants.GUAGUA_ITERATION_COUNT, GuaguaConstants.GUAGUA_DEFAULT_ITERATION_COUNT); } else { int totalIteration = 0; try { totalIteration = Integer.parseInt(cmdLine.getOptionValue("c").trim()); } catch (NumberFormatException e) { printUsage(); throw new IllegalArgumentException("Total iteration number set by '-c' should be a valid number."); } conf.setInt(GuaguaConstants.GUAGUA_ITERATION_COUNT, totalIteration); } } private static void checkMasterClassSetting(Configuration conf, CommandLine cmdLine) { if(!cmdLine.hasOption("-m")) { printUsage(); throw new IllegalArgumentException("Master class name should be provided by '-m' parameter."); } String masterClassOptionValue = cmdLine.getOptionValue("m"); if(masterClassOptionValue == null || masterClassOptionValue.length() == 0) { printUsage(); throw new IllegalArgumentException( "Master class name should be provided by '-m' parameter with non-empty value."); } Class<?> masterClass; try { masterClass = Class.forName(masterClassOptionValue.trim()); } catch (ClassNotFoundException e) { printUsage(); throw new IllegalArgumentException(String.format( "The master class %s set by '-m' can not be found in class path.", masterClassOptionValue.trim()), e); } if(!MasterComputable.class.isAssignableFrom(masterClass)) { printUsage(); throw new IllegalArgumentException( "Master class name provided by '-m' should implement 'com.paypal.guagua.master.MasterComputable' interface."); } if(!ReflectionUtils.hasEmptyParameterConstructor(masterClass)) { throw new IllegalArgumentException("Master class should have default constuctor without any parameters."); } conf.set(GuaguaConstants.MASTER_COMPUTABLE_CLASS, masterClassOptionValue.trim()); } private static void checkWorkerClassSetting(Configuration conf, CommandLine cmdLine) { if(!cmdLine.hasOption("-w")) { printUsage(); throw new IllegalArgumentException("Worker class name should be provided by '-w' parameter."); } String workerClassOptionValue = cmdLine.getOptionValue("w"); if(workerClassOptionValue == null || workerClassOptionValue.length() == 0) { printUsage(); throw new IllegalArgumentException( "Worker class name should be provided by '-w' parameter with non-empty value."); } Class<?> workerClass; try { workerClass = Class.forName(workerClassOptionValue.trim()); } catch (ClassNotFoundException e) { printUsage(); throw new IllegalArgumentException(String.format( "The worker class %s set by '-w' can not be found in class path.", workerClassOptionValue.trim()), e); } if(!WorkerComputable.class.isAssignableFrom(workerClass)) { printUsage(); throw new IllegalArgumentException( "Worker class name provided by '-w' should implement 'com.paypal.guagua.worker.WorkerComputable' interface."); } if(!ReflectionUtils.hasEmptyParameterConstructor(workerClass)) { throw new IllegalArgumentException("Worker class should have default constuctor without any parameters."); } conf.set(GuaguaConstants.WORKER_COMPUTABLE_CLASS, workerClassOptionValue.trim()); } private static void printUsage() { GuaguaOptionsParser.printGenericCommandUsage(System.out); System.out.println("For detailed invalid parameter, please check:"); } private static void checkZkServerSetting(Configuration conf, CommandLine cmdLine) { if(!cmdLine.hasOption("-z")) { System.err.println("WARN: ZooKeeper server is not set, embeded ZooKeeper server will be started."); System.err .println("WARN: For big data guagua application with fail-over zookeeper servers, independent ZooKeeper instances are recommended."); System.err.println("WARN: Zookeeper servers can be provided by '-z' parameter with non-empty value."); conf.set(GuaguaConstants.GUAGUA_ZK_EMBEDED, "true"); // change default embedded zookeeper server to master zonde boolean isZkInClient = conf.getBoolean(GuaguaConstants.GUAGUA_ZK_EMBEDBED_IS_IN_CLIENT, false); if(isZkInClient) { synchronized(GuaguaMapReduceClient.class) { if(embededZooKeeperServer == null) { // 1. start embed zookeeper server in one thread. int embedZkClientPort = 0; try { embedZkClientPort = ZooKeeperUtils.startEmbedZooKeeper(); } catch (IOException e) { throw new RuntimeException(e); } // 2. check if it is started. ZooKeeperUtils.checkIfEmbedZooKeeperStarted(embedZkClientPort); try { embededZooKeeperServer = InetAddress.getLocalHost().getHostName() + ":" + embedZkClientPort; } catch (UnknownHostException e) { throw new RuntimeException(e); } } } // 3. set local embed zookeeper server address conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, embededZooKeeperServer); } else { conf.set( GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS, conf.get( GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS, "ml.shifu.guagua.master.MasterTimer,ml.shifu.guagua.master.MemoryStatsMasterInterceptor,ml.shifu.guagua.hadoop.ZooKeeperMasterInterceptor,ml.shifu.guagua.master.NettyMasterCoordinator ")); conf.set( GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS, conf.get( GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS, "ml.shifu.guagua.worker.WorkerTimer,ml.shifu.guagua.worker.MemoryStatsWorkerInterceptor,ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor,ml.shifu.guagua.worker.NettyWorkerCoordinator")); System.err.println("WARN: Zookeeper server will be started in master node of cluster"); } return; } else { conf.set(GuaguaConstants.GUAGUA_ZK_EMBEDED, "false"); String zkServers = cmdLine.getOptionValue("z"); if(zkServers == null || zkServers.length() == 0) { throw new IllegalArgumentException( "Zookeeper servers should be provided by '-z' parameter with non-empty value."); } if(ZooKeeperUtils.checkServers(zkServers)) { conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, zkServers.trim()); } else { throw new RuntimeException("Your specifed zookeeper instance is not alive, please check."); } } } private static void checkInputSetting(Configuration conf, CommandLine cmdLine) throws IOException { if(!cmdLine.hasOption("-i")) { printUsage(); throw new IllegalArgumentException("Input should be provided by '-i' parameter."); } addInputPath(conf, new Path(cmdLine.getOptionValue("i").trim())); } public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if(args.length == 0 || (args.length == 1 && (args[0].equals("h") || args[0].equals("-h") || args[0].equals("-help") || args[0] .equals("help")))) { GuaguaOptionsParser.printGenericCommandUsage(System.out); System.exit(0); } GuaguaMapReduceClient client = new GuaguaMapReduceClient(); Job job = client.createJob(args); job.waitForCompletion(true); } }