/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.IOException; import java.util.LinkedList; import java.util.Iterator; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.SimpleDateFormat; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.conf.Configuration; /** * Used to manage the corona releases */ public class CoronaReleaseManager extends Thread { /** the release dir to copy from */ public static final String MAPRED_RELEASE_DIR = "mapred.release.dir"; /** the dir that holds all the copies of the release */ public static final String MAP_RELEASE_WORKING_DIR = "mapred.release.working.dir"; /** the interval that the thread will wake up to check the releases */ public static final String RELEASE_DIR_CLEAN_INTERVAL = "mapred.release.dir.cleanInterval"; /** the threshold that old releases will be removed */ public static final String RELEASE_DIR_CLEAN_THRESHOLD = "mapred.release.dir.cleanThreshold"; /** the pattern that decides if a file in the release dir shall be copied */ public static final String RELEASE_COPY_PATTERN = "mapred.release.file.pattern"; /** if this one is specified, only this file is used to * check if there is a new release */ public static final String CORONA_RELEASE_FILE_CHECK = "mapred.release.file.check"; /** Right after a release is copied, a tag file will be created to specify * that the copy is complete. When TT is restarted, if the tag file exists * for the latest release copy, no more copy is needed. When a JT gets a * release, the modification time of this tag file will be changed. So * thethread will know when to clean up. */ public static final String RELEASE_TAG_FILE = "RELEASE_COPY_DONE"; private static final Log LOG = LogFactory.getLog(CoronaReleaseManager.class); /** releaseDir is the original string for the release dir. * The directory retrieved from releasePath may not be the same as * the one specified in the classpath, for example .././ */ private String releaseDir; private Path releasePath; private Path workingPath; private long cleanThreshold; private long cleanInterval; private Pattern release_pattern; /** if CORONA_RELEASE_FILE_CHECK is set, only this file is used to * check if there is a new release */ private String coronaReleaseFileCheck; private final Configuration conf; private FileSystem fs; private SimpleDateFormat formatter; private LinkedList<CoronaRelease> releaseList; private boolean shutdownFlag = false; class CoronaRelease { Path copiedPath; long releaseTimestamp; LinkedList<JobID> jobids; boolean latest; byte[] fingerPrint; CoronaRelease(Path copiedPath, long releaseTimestamp, JobID jobid, byte[] fingerPrint) { this.copiedPath = copiedPath; this.releaseTimestamp = releaseTimestamp; this.latest = true; jobids = new LinkedList<JobID>(); if (jobid != null) { jobids.add(jobid); } this.fingerPrint = fingerPrint; } boolean checkIntegrity(byte[] fp) { if (fingerPrint == null || fp == null || fingerPrint.length != fp.length) { return false; } for (int i = 0; i < fp.length; ++ i) { if (fingerPrint[i] != fp[i]) { return false; } } return true; } } public CoronaReleaseManager(Configuration conf) throws IOException { this.conf = conf; fs = FileSystem.newInstanceLocal(conf); releaseDir = conf.get(MAPRED_RELEASE_DIR, ""); coronaReleaseFileCheck = conf.get(CORONA_RELEASE_FILE_CHECK, ""); String workingDir = conf.get(MAP_RELEASE_WORKING_DIR, ""); cleanInterval = conf.getLong(RELEASE_DIR_CLEAN_INTERVAL, 300000L); cleanThreshold = conf.getLong(RELEASE_DIR_CLEAN_THRESHOLD, 172800000L); String patternString = conf.get(RELEASE_COPY_PATTERN, ""); if (patternString.isEmpty()) { release_pattern = null; } else { release_pattern = Pattern.compile(patternString); } releaseList = new LinkedList<CoronaRelease>(); if (!releaseDir.isEmpty()) { releasePath = new Path(releaseDir); } if (!workingDir.isEmpty()) { workingPath = new Path(workingDir); } formatter = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss"); } @Override public void run() { if (releasePath == null || workingPath == null) { LOG.error("The releaseDir or workingDir is empty, exiting..."); return; } while (!shutdownFlag) { // check if there is a new release to copy getRelease(null); // check if old releases can be de-referenced checkOldRelease(); // check if there is any old dirs to cleanup try { long currentTime = System.currentTimeMillis(); for (FileStatus dirStat: fs.listStatus(workingPath)) { if (!dirStat.isDir()) { continue; } // If the dir 's tag file is modifies/accessed 2 days ago, // and if it is not in the release list Path dirPath = dirStat.getPath(); Path tagPath = new Path(dirPath, RELEASE_TAG_FILE); try { if (!fs.exists(tagPath)) { LOG.info("Tag " + tagPath + " is missing, removing " + dirPath); removeRelease(dirPath); } else { FileStatus fileStat = fs.getFileStatus(tagPath); if (currentTime - fileStat.getModificationTime() > cleanThreshold && checkPath(dirPath)) { // recursively delete all the files/dirs LOG.info("Remove old release " + dirPath); removeRelease(dirPath); } } } catch (IOException e) { LOG.error("Error in checking " + tagPath, e); } } } catch (IOException ioe) { LOG.error("IOException when clearing dir ", ioe); } try { Thread.sleep(cleanInterval); } catch (InterruptedException e) { } } } public void shutdown() { shutdownFlag = true; } public void returnRelease(JobID jobid) { synchronized (releaseList) { Iterator<CoronaRelease> crIt = releaseList.iterator(); // A single jobid may have used multiple releases while (crIt.hasNext()) { CoronaRelease cr = crIt.next(); Iterator<JobID> jobIdIt = cr.jobids.iterator(); while (jobIdIt.hasNext()) { JobID existingJobID = jobIdIt.next(); if (existingJobID.toString().equals(jobid.toString())) { jobIdIt.remove(); LOG.info("Return release " + cr.copiedPath + " for " + jobid); return; } } } } } public String getRelease(JobID jobid) { if (releasePath == null || workingPath == null) { LOG.error("The releaseDir or workingDir is empty"); return null; } try { if (!fs.exists(releasePath)) { LOG.info(releasePath + " is not existing"); return null; } } catch (IOException e) { LOG.error("IOException in checking " + releasePath, e); return null; } long currentTimeStamp = getLastTimeStamp(); CoronaRelease curCR = null; synchronized (releaseList) { // check if the most current one loaded for (CoronaRelease cr: releaseList) { if (cr.releaseTimestamp == currentTimeStamp) { if (jobid != null) { // update the timestamp Path donePath = new Path(cr.copiedPath, RELEASE_TAG_FILE); try { FSDataOutputStream fos = fs.create(donePath); fos.close(); } catch (IOException e) { LOG.error("Unable to recreate " + donePath); return null; } cr.jobids.add(jobid); LOG.info("Get existing release " + cr.copiedPath + " for " + jobid); } // check the finger print of copied path in case some body // delete the file in it byte [] fingerPrint = getFingerPrint(cr.copiedPath); if (fingerPrint == null) { LOG.error("Unable to get the finger print " + cr.copiedPath); return null; } if (!cr.checkIntegrity(fingerPrint)) { LOG.error("The finger print of " + cr.copiedPath + " is not correct."); curCR = cr; break; } return cr.copiedPath.toString(); } } // copied the most current release Path newWorkingPath = new Path(workingPath, formatter.format(currentTimeStamp)); LOG.info("Copy the latest release to " + newWorkingPath); if (copyRelease(releasePath, newWorkingPath, true, true)) { byte [] fingerPrint = getFingerPrint(newWorkingPath); if (fingerPrint == null) { LOG.error("Unable to get the finger print " + newWorkingPath); return null; } if (curCR != null) { curCR.fingerPrint = fingerPrint; return curCR.copiedPath.toString(); } CoronaRelease cr = new CoronaRelease(newWorkingPath, currentTimeStamp, jobid, fingerPrint); for (CoronaRelease tmpcr: releaseList) { tmpcr.latest = false; } releaseList.add(cr); LOG.info("Done with copying the latest release to " + newWorkingPath); if (jobid != null) { LOG.info("copied the latest release " + newWorkingPath + " for " + jobid); } return newWorkingPath.toString(); } else { LOG.error("Failed to copy the latest release to " + newWorkingPath); if (jobid != null) { LOG.error("Unable to get any release for " + jobid); } return null; } } } private void checkOldRelease() { synchronized (releaseList) { // check if there is any old one to remove Iterator<CoronaRelease> crIt = releaseList.iterator(); while (crIt.hasNext()) { CoronaRelease cr = crIt.next(); if (cr.jobids.size() == 0 && !cr.latest) { crIt.remove(); LOG.info("Remove " + cr.copiedPath + " from release list"); } } } } public String getOriginal() { return releaseDir; } private boolean checkPath(Path inPath) { synchronized (releaseList) { Iterator<CoronaRelease> crIt = releaseList.iterator(); while (crIt.hasNext()) { CoronaRelease cr = crIt.next(); if (cr.copiedPath.toString().equals(inPath.toUri().getPath().toString())) { return false; } } } return true; } /** getLastStamp will go throught all the files and directories in the release * directory, and find the largest timestamp. This is used to check if there * is any new release. RELEASE_COPY_PATTERN and CORONA_RELEASE_FILE_CHECK can * be used to limit the files checked */ private long getLastTimeStamp() { long result = -1; if (coronaReleaseFileCheck != null && !coronaReleaseFileCheck.isEmpty()) { result = getLastTimeStamp(new Path(releasePath, coronaReleaseFileCheck)); if (result > 0) { return result; } } return getLastTimeStamp(releasePath); } /** * Get the release directory's latest timestamp */ private long getLastTimeStamp(Path pathToCheck) { long lastTimeStamp = -1; long tmpTimeStamp = -1; try { for (FileStatus fileStat: fs.listStatus(pathToCheck)) { Path srcPath = fileStat.getPath(); if (!fileStat.isDir()) { boolean checkFlag = true; if (release_pattern != null) { // just need to check the files that match the pattern Matcher m = release_pattern.matcher(srcPath.toString()); if (!m.find()) { checkFlag = false; } } if (checkFlag) { tmpTimeStamp = fileStat.getModificationTime(); } else { continue; } } else { tmpTimeStamp = getLastTimeStamp(srcPath); } if (tmpTimeStamp > lastTimeStamp) { lastTimeStamp = tmpTimeStamp; } } } catch (IOException ioe) { LOG.error("IOException when checking timestamp ", ioe); } return lastTimeStamp; } /** For every jar files from the source, create a link in the dest */ private boolean copyRelease(Path src, Path dest, boolean isTop, boolean isForced) { try { if (!fs.exists(dest)) { if (!fs.mkdirs(dest)) { LOG.error("Unable to make dir " + dest.toString()); return false; } } else { if (isTop && !isForced) { Path donePath = new Path(dest, RELEASE_TAG_FILE); if (fs.exists(donePath)) { LOG.info(donePath + " exists. There is no need to copy again"); return true; } } } for (FileStatus fileStat: fs.listStatus(src)) { Path srcPath = fileStat.getPath(); if (!fileStat.isDir()) { boolean copyFlag = true; if (release_pattern != null) { Matcher m = release_pattern.matcher(srcPath.toString()); if (!m.find()) { copyFlag = false; } } if (copyFlag) { Path destPath = new Path(dest, srcPath.getName()); fs.copyFromLocalFile(srcPath, destPath); } } else { Path destPath = new Path(dest, srcPath.getName()); if (!copyRelease(srcPath, destPath, false, isForced)) { LOG.error("Unable to create link for " + srcPath.toString() + " as " + destPath.toString()); return false; } } } if (isTop) { // create the tag file Path donePath = new Path(dest, RELEASE_TAG_FILE); FSDataOutputStream fos = fs.create(donePath); fos.close(); } } catch (IOException ioe) { LOG.error("IOException when link dir ", ioe); return false; } return true; } private boolean removeRelease(Path pathToRemove) { try { fs.delete(pathToRemove, true); } catch (IOException ioe) { LOG.error("IOException when remove release " + pathToRemove.toString(), ioe); return false; } return true; } private byte [] getFingerPrint(Path path) { MessageDigest messageDigest; try { messageDigest = MessageDigest.getInstance("MD5"); long currentTime = System.currentTimeMillis(); computeFingerPrint("", path, messageDigest); long endTime = System.currentTimeMillis(); LOG.info((endTime-currentTime) + " ms spent to get finger print"); return messageDigest.digest(); } catch (NoSuchAlgorithmException e) { return null; } } private void computeFingerPrint(String parent, Path path, MessageDigest messageDigest) { try { for (FileStatus fileStat: fs.listStatus(path)) { Path srcPath = fileStat.getPath(); if (!fileStat.isDir()) { String fileName = srcPath.getName(); if (!fileName.equals(RELEASE_TAG_FILE)) { String finger = parent + fileName + fileStat.getModificationTime(); messageDigest.update(finger.getBytes("UTF-8")); } } else { computeFingerPrint(parent + srcPath.getName(), srcPath, messageDigest); } } } catch (IOException ioe) { LOG.error("IOException when compute finger print " + path.toString(), ioe); } } }