/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; import java.io.File; import java.io.InterruptedIOException; import java.util.Date; import java.lang.Thread; import java.net.InetSocketAddress; import org.apache.commons.logging.Log; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ipc.*; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.DNS; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.FSImage; import org.apache.hadoop.hdfs.server.namenode.AvatarNode; import org.apache.hadoop.hdfs.tools.offlineImageViewer.LsImageVisitor; import org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer; import org.apache.hadoop.hdfs.util.InjectionEvent; import org.apache.hadoop.hdfs.util.InjectionHandler; import org.apache.hadoop.http.HttpServer; /** * This class drives the ingest of transaciton logs from primary. * It also implements periodic checkpointing of the primary namenode. */ public class Standby implements Runnable{ public static final Log LOG = AvatarNode.LOG; private static final long CHECKPOINT_DELAY = 10000; // 10 seconds private AvatarNode avatarNode; private Configuration confg; // configuration of local standby namenode private Configuration startupConf; // original configuration of AvatarNode private FSImage fsImage; // fsImage of the current namenode. private FSNamesystem fsnamesys; // fsnamesystem of the local standby namenode volatile private Ingest ingest; // object that processes transaction logs from primary volatile private Thread ingestThread; // thread that is procesing the transaction log volatile private boolean running; private final String machineName; // host name of name node // // These are for the Secondary NameNode. // private String fsName; // local namenode http name private InetSocketAddress nameNodeAddr; // remote primary namenode address private NamenodeProtocol primaryNamenode; // remote primary namenode private HttpServer infoServer; private int infoPort; private String infoBindAddress; private long checkpointPeriod; // in seconds private long checkpointSize; // size (in MB) of current Edit Log private long lastCheckpointTime; private long earlyScheduledCheckpointTime = Long.MAX_VALUE; private long sleepBetweenErrors; private boolean checkpointEnabled; volatile private Thread backgroundThread; // thread for secondary namenode volatile private CheckpointSignature sig; private volatile String checkpointStatus; // two different types of ingested file public enum IngestFile { EDITS, EDITS_NEW }; // allowed states of the ingest thread enum StandbyIngestState { NOT_INGESTING, INGESTING_EDITS, QUIESCING_EDITS, CHECKPOINTING, INGESTING_EDITS_NEW, QUIESCING_EDITS_NEW, STANDBY_QUIESCED }; // currently consumed ingest (edits, or edits.new) private volatile File currentIngestFile = null; protected volatile StandbyIngestState currentIngestState = StandbyIngestState.NOT_INGESTING; protected Object ingestStateLock = new Object(); private boolean lastFinalizeCheckpointFailed = false; // names of the edits files private final File editsFile; private final File editsFileNew; private final File tmpImageFileForValidation; // The Standby can either be processing transaction logs // from the primary namenode or it could be doing a checkpoint to upload a merged // fsimage to the primary. // The startupConf is the original configuration that was used to start the // AvatarNode. It is used by the secondary namenode to talk to the primary. // The "conf" is the configuration of the local standby namenode. // Standby(AvatarNode avatarNode, Configuration startupConf, Configuration conf) throws IOException { this.running = true; this.avatarNode = avatarNode; this.confg = conf; this.startupConf = startupConf; this.fsImage = avatarNode.getFSImage(); this.fsnamesys = avatarNode.getNamesystem(); this.sleepBetweenErrors = startupConf.getInt("hdfs.avatarnode.sleep", 5000); initSecondary(startupConf); // start webserver for secondary namenode this.machineName = DNS.getDefaultHost(conf.get("dfs.namenode.dns.interface","default"), conf.get("dfs.namenode.dns.nameserver","default")); LOG.info("machineName=" + machineName); this.editsFile = this.avatarNode.getRemoteEditsFile(conf); this.editsFileNew = this.avatarNode.getRemoteEditsFileNew(conf); InetSocketAddress addr = NameNode.getAddress(conf); this.tmpImageFileForValidation = new File("/tmp", "hadoop_image." + addr.getHostName() + ":" + addr.getPort()); checkpointStatus("No checkpoint initiated"); } public void run() { backgroundThread = Thread.currentThread(); while (running) { try { InjectionHandler.processEventIO(InjectionEvent.STANDBY_BEGIN_RUN); // if the checkpoint periodicity or the checkpoint size has // exceeded the configured parameters, then also we have to checkpoint // long now = AvatarNode.now(); // Check to see if the primary is somehow checkpointing itself. If so, then // exit the StandbyNode, we cannot have two daemons checkpointing the same // namespace at the same time if (hasStaleCheckpoint()) { backgroundThread = null; quiesce(AvatarNode.TXID_IGNORE); break; } if (lastCheckpointTime == 0 || (lastCheckpointTime + 1000 * checkpointPeriod < now) || (earlyScheduledCheckpointTime < now) || avatarNode.editSize(confg) > checkpointSize) { // schedule an early checkpoint if this current one fails. earlyScheduledCheckpointTime = now + CHECKPOINT_DELAY; doCheckpoint(); earlyScheduledCheckpointTime = Long.MAX_VALUE; lastCheckpointTime = now; InjectionHandler .processEvent(InjectionEvent.STANDBY_AFTER_DO_CHECKPOINT); // set the last expected checkpoint time on the primary. avatarNode.setStartCheckpointTime(startupConf); } // if edit and edits.new both exists, then we schedule a checkpoint // to occur very soon. // Only reschedule checkpoint if it is not scheduled to occur even sooner if ((avatarNode.twoEditsFile(startupConf)) && (earlyScheduledCheckpointTime > now + CHECKPOINT_DELAY)) { LOG.warn("Standby: edits and edits.new found, scheduling early checkpoint."); earlyScheduledCheckpointTime = now + CHECKPOINT_DELAY; } // if the checkpoint creation has switched off ingesting, then we restart the // ingestion here. if (ingest == null) { InjectionHandler.processEvent(InjectionEvent.STANDBY_CREATE_INGEST_RUNLOOP); instantiateIngest(IngestFile.EDITS); } try { Thread.sleep(sleepBetweenErrors); } catch (InterruptedException e) { return; } } catch (SaveNamespaceCancelledException e) { return; } catch (IOException e) { LOG.warn("Standby: encounter exception " + StringUtils.stringifyException(e)); if(!running) // standby is quiescing return; try { Thread.sleep(sleepBetweenErrors); } catch (InterruptedException e1) { // give a change to exit this thread, if necessary } // since we had an error, we have to cleanup the ingest thread if (ingest != null) { ingest.stop(); try { ingestThread.join(); LOG.info("Standby: error cleanup Ingest thread exited."); } catch (InterruptedException em) { String msg = "Standby: error cleanup Ingest thread did not exit. " + em; LOG.info(msg); throw new RuntimeException(msg); } clearIngestState(); } } catch (Throwable e) { LOG.warn("Standby: encounter exception ", e); running = false; } } } synchronized void shutdown() { if (!running) { return; } if (infoServer != null) { try { LOG.info("Shutting down secondary info server"); infoServer.stop(); infoServer = null; } catch (Exception ex) { LOG.error("Error shutting down infoServer", ex); } } } /** * Quiesces the ingest for the given file typ * * @param type ingest to quiesce * @param sig signature for quiescing (checkpointing) */ private void quiesceIngest(IngestFile type, CheckpointSignature sig) throws IOException { File edits; InjectionHandler.processEvent(InjectionEvent.STANDBY_QUIESCE_INGEST); synchronized (ingestStateLock) { if (type == IngestFile.EDITS) { assertState(StandbyIngestState.INGESTING_EDITS, StandbyIngestState.QUIESCING_EDITS); } else { assertState(StandbyIngestState.INGESTING_EDITS_NEW, StandbyIngestState.QUIESCING_EDITS_NEW); } edits = getIngestFile(type); currentIngestState = (type == IngestFile.EDITS) ? StandbyIngestState.QUIESCING_EDITS : StandbyIngestState.QUIESCING_EDITS_NEW; ingest.quiesce(sig); } try { ingestThread.join(); currentIngestState = StandbyIngestState.NOT_INGESTING; LOG.info("Standby: Quiesce - Ingest thread for " + edits.getName() + " exited."); } catch (InterruptedException e) { LOG.info("Standby: Quiesce - Ingest thread interrupted."); throw new IOException(e.getMessage()); } } /** * Instantiates ingest thread for the given edits file type * * @param type (EDITS, EDITS_NEW) */ private void instantiateIngest(IngestFile type) throws IOException { File edits; InjectionHandler.processEvent(InjectionEvent.STANDBY_INSTANTIATE_INGEST); synchronized (ingestStateLock) { assertState(StandbyIngestState.NOT_INGESTING); edits = getIngestFile(type); // if the file does not exist, // do not change the state if (!edits.exists() || InjectionHandler .falseCondition(InjectionEvent.STANDBY_EDITS_NOT_EXISTS, type)) { return; } setCurrentIngestFile(edits); ingest = new Ingest(this, fsnamesys, confg, edits); ingestThread = new Thread(ingest); ingestThread.start(); currentIngestState = type == IngestFile.EDITS ? StandbyIngestState.INGESTING_EDITS : StandbyIngestState.INGESTING_EDITS_NEW; } LOG.info("Standby: Instantiated ingest for edits file: " + edits.getName()); } /** * Processes a given edit file type. * Method to be used when quiescing the standby! * * @param type (EDITS, EDITS_NEW) */ private int processIngestFileForQuiescing(IngestFile type) throws IOException { boolean editsNew = type == IngestFile.EDITS_NEW; assertState(StandbyIngestState.NOT_INGESTING, editsNew ? StandbyIngestState.INGESTING_EDITS_NEW : StandbyIngestState.INGESTING_EDITS, editsNew ? StandbyIngestState.QUIESCING_EDITS_NEW : StandbyIngestState.QUIESCING_EDITS); if (ingest == null) { instantiateIngest(type); } quiesceIngest(type, null); return ingest.getLogVersion(); } /** * Processes previously consumed edits file * Method to be used when quiescing the standby! * * @param type (EDITS, EDITS_NEW) */ private void reprocessIngestFileForQuiescing(IngestFile type) throws IOException{ assertState(StandbyIngestState.NOT_INGESTING); File edits = getIngestFile(type); LOG.info("Standby: Quiesce - reprocessing edits file: " + edits.getName()); if (!edits.exists()) { LOG.warn("Standby: Quiesce - reprocessing edits file - edits file: " + edits.getName() + " does not exists."); return; } instantiateIngest(type); quiesceIngest(type, null); // verify that the entire transaction log was truly consumed // when re-processing, if we fail here, we cannot do anything // better than fail if(!ingest.getIngestStatus()){ String emsg = "Standby: Quiesce could not successfully ingest " + edits.getName() + " transaction log."; LOG.warn(emsg); throw new IOException(emsg); } clearIngestState(); } // // stop checkpointing, read edit and edits.new(if it exists) // into local namenode // synchronized void quiesce(long lastTxId) throws IOException { if (currentIngestState == StandbyIngestState.STANDBY_QUIESCED) { LOG.info("Standby: Quiescing - already quiesced"); return; // nothing to do } // have to wait for the main thread to exit here // first stop the main thread before stopping the ingest thread LOG.info("Standby: Quiesce - starting"); running = false; fsnamesys.cancelSaveNamespace("Standby: Quiescing - Cancel save namespace"); InjectionHandler.processEvent(InjectionEvent.STANDBY_INTERRUPT); try { if (backgroundThread != null) { backgroundThread.join(); backgroundThread = null; } } catch (InterruptedException e) { LOG.info("Standby: quiesce interrupted."); throw new IOException(e.getMessage()); } try { if (infoServer != null) { infoServer.stop(); infoServer= null; } } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } int logVersion = 0; boolean reprocessEdits = false; // handle "edits" if (!isCurrentEditsNew()) { // assert correct state logVersion = processIngestFileForQuiescing(IngestFile.EDITS); // reprocess edits if failed if (!ingest.getIngestStatus()){ clearIngestState(); reprocessIngestFileForQuiescing(IngestFile.EDITS); } clearIngestState(); reprocessEdits = true; } // if the transactions don't match, // there is most probably edits.new if (!transactionsMatch(lastTxId, logVersion)) { pollEditsNew(30); } // handle "edits.new" if (editsNewExists()) { logVersion = processIngestFileForQuiescing(IngestFile.EDITS_NEW); clearIngestState(); } // if for some reason we did not succeed, // or the last transaction doesn't match // try to re-read both files and replay the logs // (skips transactions applied before) if ((ingest != null && !ingest.getIngestStatus()) || (!transactionsMatch(lastTxId, logVersion))) { // try to reopen the logs and re-read them if (reprocessEdits) { // re-read edits, if needed reprocessIngestFileForQuiescing(IngestFile.EDITS); } reprocessIngestFileForQuiescing(IngestFile.EDITS_NEW); } // final sanity verification of transaction id's if(!transactionsMatch(lastTxId, logVersion)){ String emsg = "Standby: Quiesce - could not successfully ingest " + " transaction logs. Transaction Mismatch: " + lastTxId + " avatar txid: " + avatarNode.getLastWrittenTxId(); LOG.warn(emsg); throw new IOException(emsg); } clearIngestState(); // mark quiesce as completed LOG.info("Standby: Quiesce - completed"); currentIngestState = StandbyIngestState.STANDBY_QUIESCED; } /** * Check if the given transaction is the last one applied * * @param lastTxId given last transaction * @return true if the given transaction is the last one or should be ignored, * or the layout version does not support transaction ids */ private boolean transactionsMatch(long lastTxId, int logVersion){ return (lastTxId == AvatarNode.TXID_IGNORE) || (logVersion > FSConstants.STORED_TXIDS) || (lastTxId == avatarNode.getLastWrittenTxId()); } /** * Check to see if the remote namenode is doing its own checkpointing. This can happen * when the remote namenode is restarted. This method returns true if the remote * namenode has done an unexpected checkpoint. This method retrieves the fstime of the * remote namenode and matches it against the fstime of the checkpoint when this * AvatarNode did a full-sync of the edits log. It also matches the size of * both the images. The reason for this is as follows : * * Just after a checkpoint is done there is small duration of time when the * remote and local fstime don't match even for a good checkpoint, but * fortunately both the images do match and we should check whether both * have the same size. Note that even if this check does not catch a stale * checkpoint (in the rare case where both images have the same length but * are not the same), our transaction id based verification will definitely * catch this issue. */ boolean hasStaleCheckpoint() throws IOException { long remotefsTime = avatarNode.readRemoteFstime(startupConf); long localfsTime = avatarNode.getStartCheckpointTime(); long remoteImageSize = avatarNode.getRemoteImageFile(startupConf).length(); long localImageSize = avatarNode.getAvatarImageFile(startupConf).length(); if (remotefsTime != localfsTime && remoteImageSize != localImageSize) { LOG.warn("Standby: The remote active namenode might have been restarted."); LOG.warn("Standby: The fstime of checkpoint from which the Standby was created is " + AvatarNode.dateForm.format(new Date(localfsTime)) + " but remote fstime is " + AvatarNode.dateForm.format(new Date(remotefsTime))); avatarNode.doRestart(); return true; } return false; } private void pollEditsNew(int numRetries) throws IOException { for (int i = 0; i < numRetries; i++) { if (editsNewExists()) break; try { Thread.sleep(1000); } catch (InterruptedException e) { throw new IOException("Standby: - received interruption"); } LOG.info("Standby: - retrying to check if edits.new exists... try: " + i); } } /** * Set current checkpoint status */ private void checkpointStatus(String st) { checkpointStatus = new Date(System.currentTimeMillis()).toString() + ": " + st; } /** * Get current checkpoint status. * Used for webui. */ protected String getCheckpointStatus() { return checkpointStatus; } /** * writes the in memory image of the local namenode to the fsimage * and tnen uploads this image to the primary namenode. The transaction * log on the primary is purged too. */ public void doCheckpoint() throws IOException { try { InjectionHandler.processEvent(InjectionEvent.STANDBY_ENTER_CHECKPOINT, this.sig); // Tell the remote namenode to start logging transactions in a new edit file // Retuns a token that would be used to upload the merged image. if (!checkpointEnabled) { checkpointStatus("Disabled"); // This means the Standby is not meant to checkpoint the primary LOG.info("Standby: Checkpointing is disabled - return"); return; } CheckpointSignature sig = null; InjectionHandler.processEvent(InjectionEvent.STANDBY_BEFORE_ROLL_EDIT); try { LOG.info("Standby: Checkpointing - Roll edits logs of primary namenode " + nameNodeAddr); checkpointStatus("Edit log rolled on primary"); sig = (CheckpointSignature) primaryNamenode.rollEditLog(); } catch (IOException ex) { // In this case we can return since we did not kill the Ingest thread yet // Nothing prevents us from doing the next checkpoint attempt checkpointStatus("Checkpoint failed"); LOG.warn("Standby: Checkpointing - roll Edits on the primary node failed."); return; } if (this.sig != null && this.sig.equals(sig) && lastFinalizeCheckpointFailed) { // previous checkpoint failed, maybe we have the image saved? LOG.info("Standby: Checkpointing - retrying to finalize previous checkpoint"); try { finalizeCheckpoint(sig); } catch (IOException ex){ LOG.error("Standby: Checkpointing - can't finalize previous checkpoing, " + "will retry later."); lastFinalizeCheckpointFailed = true; throw ex; } return; } else if (this.sig != null && lastFinalizeCheckpointFailed) { // last checkpoint did not succeed, but the primary has // been checkpointed in the meantime throw new RuntimeException( "Last checkpoint did not succeed, but the signatures do not match. " + "The primary was checkpointed in the meantime."); } setLastRollSignature(sig); // Ingest till end of edits log if (ingest == null) { LOG.info("Standby: Checkpointing - creating ingest thread to process all transactions."); instantiateIngest(IngestFile.EDITS); } checkpointStatus("Quiescing ingest"); quiesceIngest(IngestFile.EDITS, sig); LOG.info("Standby: Checkpointing - finished quitting ingest thread just before ckpt."); if (!ingest.getIngestStatus()) { checkpointStatus("Re-quiescing ingest"); // try to reopen the log and re-read it instantiateIngest(IngestFile.EDITS); quiesceIngest(IngestFile.EDITS, sig); } assertState(StandbyIngestState.NOT_INGESTING); if (!ingest.getIngestStatus()) { clearIngestState(); String emsg = "Standby: Checkpointing - could not ingest transaction log."; emsg += " This is real bad because we do not know how much edits we have consumed."; emsg += " It is better to exit the AvatarNode here."; LOG.error(emsg); throw new RuntimeException(emsg); } clearIngestState(); assertState(StandbyIngestState.NOT_INGESTING); /** * From now on Ingest thread needs to know if the checkpoint was started and never finished. * This would mean that it doesn't have to read the edits, since they were already processed * to the end as a part of a checkpoint. state = StandbyIngestState.CHECKPOINTING */ fsnamesys.writeLock(); try { InjectionHandler.processEvent(InjectionEvent.STANDBY_BEFORE_SAVE_NAMESPACE); currentIngestState = StandbyIngestState.CHECKPOINTING; // roll transaction logs on local namenode LOG.info("Standby: Close editlog on local namenode."); fsImage.getEditLog().close(); // save a checkpoint of the current namespace of the local Namenode // We should ideally use fsnamesystem.saveNamespace but that works // only if namenode is not in safemode. LOG.info("Standby: Checkpointing - save fsimage on local namenode."); checkpointStatus("Saving namespace started"); fsnamesys.saveNamespace(false, false); } catch (SaveNamespaceCancelledException e) { InjectionHandler.processEvent(InjectionEvent.STANDBY_CANCELLED_EXCEPTION_THROWN); LOG.info("Standby: Checkpointing - cancelled saving namespace"); throw e; } catch (IOException ex) { // Standby failed to save fsimage locally. Need to reinitialize String msg = "Standby: Checkpointing - failed to checkpoint itself, so " + "no image can be uploaded to the primary. The only course of action " + "is to start from the very beginning by reinitializing AvatarNode"; LOG.error(msg, ex); throw new RuntimeException(msg, ex); } finally { currentIngestState = StandbyIngestState.NOT_INGESTING; fsnamesys.writeUnlock(); } pollEditsNew(30); // we can start the ingest again for edits.new!!! instantiateIngest(IngestFile.EDITS_NEW); try { finalizeCheckpoint(sig); } catch (IOException ex) { // If the rollFsImage has actually succeeded on the Primary, but // returned with the exception on recreation our Ingest will throw // a runtime exception and the Avatar will be restarted. LOG.error("Standby: Checkpointing - rolling the fsimage " + "on the Primary node failed.", ex); lastFinalizeCheckpointFailed = true; throw ex; } } catch (IOException e) { LOG.error("Standby: Checkpointing - failed to complete the checkpoint: " + StringUtils.stringifyException(e)); checkpointStatus("Checkpoint failed"); throw e; } finally { InjectionHandler.processEvent(InjectionEvent.STANDBY_EXIT_CHECKPOINT, this.sig); } } /** * Load the image to validate that it is not corrupted */ private class ImageValidator extends Thread { private OfflineImageViewer viewer; volatile private Throwable error = null; volatile private boolean succeeded = false; private ImageValidator(File imageFile) throws IOException { LOG.info("Validating image file " + imageFile); tmpImageFileForValidation.delete(); LsImageVisitor v = new LsImageVisitor(tmpImageFileForValidation.toString()); viewer = new OfflineImageViewer(imageFile.toString(), v, true); } public void run() { try { viewer.go(); succeeded = true; } catch (Throwable e) { error = e; } } } private void finalizeCheckpoint(CheckpointSignature sig) throws IOException{ File[] imageFiles = fsImage.getImageFiles(); if (imageFiles.length == 0) { throw new IOException("No good image is left"); } File imageFile = imageFiles[0]; InjectionHandler.processEvent(InjectionEvent.STANDBY_BEFORE_PUT_IMAGE, imageFile); // start a thread to validate image while uploading the image to primary ImageValidator imageValidator = new ImageValidator(imageFile); imageValidator.run(); // copy image to primary namenode LOG.info("Standby: Checkpointing - Upload fsimage to remote namenode."); checkpointStatus("Image upload started"); putFSImage(sig); // check if the image is valid try { imageValidator.join(); } catch (InterruptedException ie) { throw (IOException)new InterruptedIOException().initCause(ie); } if (!imageValidator.succeeded) { throw new IOException("Image file validation failed", imageValidator.error); } // make transaction to primary namenode to switch edit logs LOG.info("Standby: Checkpointing - Roll fsimage on primary namenode."); InjectionHandler.processEventIO(InjectionEvent.STANDBY_BEFORE_ROLL_IMAGE); assertState( StandbyIngestState.NOT_INGESTING, StandbyIngestState.INGESTING_EDITS_NEW); // we might concurrently reopen ingested file because of // checksum error synchronized (ingestStateLock) { boolean editsNewExisted = editsNewExists(); try { primaryNamenode.rollFsImage(new CheckpointSignature(fsImage)); } catch (IOException e) { if (editsNewExisted && !editsNewExists() && currentIngestState == StandbyIngestState.INGESTING_EDITS_NEW) { // we were ingesting edits.new // the roll did not succeed but edits.new does not exist anymore // assume that the roll succeeded LOG.warn("Roll did not succeed but edits.new does not exist!!! - assuming roll succeeded", e); } else { throw e; } } // after successful roll edits.new is rolled to edits // and we should be consuming it setCurrentIngestFile(editsFile); if (currentIngestState == StandbyIngestState.INGESTING_EDITS_NEW) { // 1) We currently consume edits.new - do the swap currentIngestState = StandbyIngestState.INGESTING_EDITS; } // 2) otherwise we don't consume anything - do not change the state } setLastRollSignature(null); lastFinalizeCheckpointFailed = false; LOG.info("Standby: Checkpointing - Checkpoint done. New Image Size: " + fsImage.getFsImageName().length()); checkpointStatus("Completed"); } /** * Initialize the webserver so that the primary namenode can fetch * transaction logs from standby via http. */ void initSecondary(Configuration conf) throws IOException { nameNodeAddr = avatarNode.getRemoteNamenodeAddress(conf); this.primaryNamenode = (NamenodeProtocol) RPC.waitForProxy(NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr, conf); fsName = avatarNode.getRemoteNamenodeHttpName(conf); // Initialize other scheduling parameters from the configuration checkpointEnabled = conf.getBoolean("fs.checkpoint.enabled", false); checkpointPeriod = conf.getLong("fs.checkpoint.period", 3600); checkpointSize = conf.getLong("fs.checkpoint.size", 4194304); // initialize the webserver for uploading files. String infoAddr = NetUtils.getServerAddress(conf, "dfs.secondary.info.bindAddress", "dfs.secondary.info.port", "dfs.secondary.http.address"); InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr); infoBindAddress = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); infoServer = new HttpServer("secondary", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf); infoServer.setAttribute("name.system.image", fsImage); this.infoServer.setAttribute("name.conf", conf); infoServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class); infoServer.start(); // The web-server port can be ephemeral... ensure we have the correct info infoPort = infoServer.getPort(); conf.set("dfs.secondary.http.address", infoBindAddress + ":" +infoPort); LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" +infoPort); LOG.warn("Checkpoint Period :" + checkpointPeriod + " secs " + "(" + checkpointPeriod/60 + " min)"); LOG.warn("Log Size Trigger :" + checkpointSize + " bytes " + "(" + checkpointSize/1024 + " KB)"); } /** * Copy the new fsimage into the NameNode */ private void putFSImage(CheckpointSignature sig) throws IOException { String fileid = "putimage=1&port=" + infoPort + "&machine=" + machineName + "&token=" + sig.toString(); LOG.info("Standby: Posted URL " + fsName + fileid); TransferFsImage.getFileClient(fsName, fileid, (File[])null, false); } public void setLastRollSignature(CheckpointSignature sig) { this.sig = sig; } public CheckpointSignature getLastRollSignature() { return this.sig; } public boolean fellBehind() { synchronized (ingestStateLock) { switch (currentIngestState) { case INGESTING_EDITS: case QUIESCING_EDITS: return editsNewExists() ? true : ingest.catchingUp(); case INGESTING_EDITS_NEW: case QUIESCING_EDITS_NEW: return ingest.catchingUp(); case NOT_INGESTING: case CHECKPOINTING: return true; case STANDBY_QUIESCED: return false; default: throw new IllegalStateException("Unknown ingest state: " + currentIngestFile); } } } public long getLagBytes() { if (this.ingest == null) { if (currentIngestState == StandbyIngestState.CHECKPOINTING) { try { // If it's checkpointing, the primary is writing to edits.new File edits = avatarNode.getRemoteEditsFileNew(startupConf); return edits.length(); } catch (IOException e) { LOG.error("Fail to get lagbytes", e); return -1; } } else { // two rare cases could come here: quiesce and error, no good value // could return return -1; } } return this.ingest.getLagBytes(); } private void clearIngestState() { synchronized (ingestStateLock) { currentIngestState = StandbyIngestState.NOT_INGESTING; ingest = null; ingestThread = null; setCurrentIngestFile(null); } } /** * Set the edits file that the current ingest thread is processing * Set appropriate state to indicate which file is being ingested. * * @param file - current ingest file */ private void setCurrentIngestFile(File file) { currentIngestFile = file; } /** * Get the current ingest file */ public File getCurrentIngestFile() { return currentIngestFile; } /** * Returns true if we are currently processing edits.new * Should be avoided because it compares strings!!! * @return true if we are consuming edits.new, false otherwise */ private boolean isCurrentEditsNew() { // if nothing set, assume that we need to process edits synchronized (ingestStateLock) { return currentIngestFile == null ? false : currentIngestFile .equals(editsFileNew); } } private boolean editsNewExists() { return editsFileNew.exists() || editsFileNew.length() > 0; } /** * Assert that the standby is in the expected state * * @param expectedStates expected states to be in */ private void assertState(StandbyIngestState... expectedStates) throws IOException { for (StandbyIngestState s : expectedStates) { if (currentIngestState == s) return; } throw new IOException("Standby: illegal state - current: " + currentIngestState); } /** * Helper function which returns the ingest file for the given typ * @param type type (EDITS, EDITS.NEW) * @return the corresponding ingest file */ private File getIngestFile(IngestFile type) { return type == IngestFile.EDITS ? editsFile : editsFileNew; } }