/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.datanode; import java.io.File; import java.io.IOException; import java.net.ConnectException; import java.net.InetSocketAddress; import java.net.SocketTimeoutException; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.AvatarZooKeeperClient; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.AvatarProtocol; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.DataTransferProtocol; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption; import org.apache.hadoop.hdfs.server.namenode.AvatarNode; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.protocol.BlockReport; import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException; import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand; import org.apache.hadoop.hdfs.server.common.IncorrectVersionException; import org.apache.hadoop.hdfs.util.InjectionEvent; import org.apache.hadoop.hdfs.util.InjectionHandler; import org.apache.hadoop.hdfs.protocol.UnregisteredDatanodeException; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.util.StringUtils; import org.apache.zookeeper.data.Stat; /** * This is an implementation of the AvatarDataNode, a wrapper * for a regular datanode that works with AvatarNode. * * The AvatarDataNode is needed to make a vanilla DataNode send * block reports to Primary and standby namenodes. The AvatarDataNode * does not know which one of the namenodes is primary and which is * secondary. * * Typically, an adminstrator will have to specify the pair of * AvatarNodes via fs1.default.name and fs2.default.name * */ public class AvatarDataNode extends DataNode { static { Configuration.addDefaultResource("avatar-default.xml"); Configuration.addDefaultResource("avatar-site.xml"); } public static final Log LOG = LogFactory.getLog(AvatarDataNode.class.getName()); public AvatarDataNode(Configuration conf, AbstractList<File> dataDirs, String dnThreadName) throws IOException { super(conf, dataDirs); AvatarDataNode.dnThreadName = dnThreadName; } private static List<InetSocketAddress> getDatanodeProtocolAddresses( Configuration conf, Collection<String> serviceIds) throws IOException { // Use default address as fall back String defaultAddress; try { defaultAddress = conf.get(FileSystem.FS_DEFAULT_NAME_KEY); if (defaultAddress != null) { Configuration newConf = new Configuration(conf); newConf.set(FileSystem.FS_DEFAULT_NAME_KEY, defaultAddress); defaultAddress = NameNode.getHostPortString(NameNode.getAddress(newConf)); } } catch (IllegalArgumentException e) { defaultAddress = null; } List<InetSocketAddress> addressList = DFSUtil.getAddresses(conf, serviceIds, defaultAddress, NameNode.DATANODE_PROTOCOL_ADDRESS, FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY); if (addressList == null) { throw new IOException("Incorrect configuration: namenode address " + FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY + " is not configured."); } return addressList; } @Override void startDataNode(Configuration conf, AbstractList<File> dataDirs ) throws IOException { initGlobalSetting(conf, dataDirs); Collection<String> serviceIds = DFSUtil.getNameServiceIds(conf); List<InetSocketAddress> defaultNameAddrs = AvatarDataNode.getDatanodeProtocolAddresses(conf, serviceIds); List<InetSocketAddress> nameAddrs0 = DFSUtil.getRPCAddresses("0", conf, serviceIds, NameNode.DATANODE_PROTOCOL_ADDRESS, FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> nameAddrs1 = DFSUtil.getRPCAddresses("1", conf, serviceIds, NameNode.DATANODE_PROTOCOL_ADDRESS, FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> avatarAddrs0 = AvatarDataNode.getAvatarNodeAddresses("0", conf, serviceIds); List<InetSocketAddress> avatarAddrs1 = AvatarDataNode.getAvatarNodeAddresses("1", conf, serviceIds); namespaceManager = new AvatarNamespaceManager(nameAddrs0, nameAddrs1, avatarAddrs0, avatarAddrs1, defaultNameAddrs, DFSUtil.getNameServiceIds(conf)); initDataSetAndScanner(conf, dataDirs, nameAddrs0.size()); } @Override protected void notifyNamenodeReceivedBlock(int namespaceId, Block block, String delHint) throws IOException { if (block == null) { throw new IllegalArgumentException("Block is null"); } ((AvatarNamespaceManager)namespaceManager).notifyNamenodeReceivedBlock( namespaceId, block, delHint); } @Override protected void notifyNamenodeDeletedBlock(int namespaceId, Block block) throws IOException { if (block == null) { throw new IllegalArgumentException("Block is null"); } ((AvatarNamespaceManager)namespaceManager).notifyNamenodeDeletedBlock(namespaceId, block); } /** TODO: will add more details to this later on * Manages OfferService objects for the data node namespaces. * Each namespace has two OfferServices, one for pirmary and one for standby. * Creation, removal, starting, stopping, shutdown on OfferService * objects must be done via APIs in this class. */ class AvatarNamespaceManager extends NamespaceManager { private final Object refreshNamenodesLock = new Object(); AvatarNamespaceManager( List<InetSocketAddress> nameAddrs0, List<InetSocketAddress> nameAddrs1, List<InetSocketAddress> avatarAddrs0, List<InetSocketAddress> avatarAddrs1, List<InetSocketAddress> defaultAddrs, Collection<String> nameserviceIds) throws IOException { Iterator<String> it = nameserviceIds.iterator(); for ( int i = 0; i<nameAddrs0.size(); i++) { InetSocketAddress nameAddr0 = nameAddrs0.get(i); String nameserviceId = it.hasNext()? it.next(): null; nameNodeThreads.put(nameAddr0, new ServicePair(nameAddr0, nameAddrs1.get(i), avatarAddrs0.get(i), avatarAddrs1.get(i), defaultAddrs.get(i), nameserviceId)); } } /** * Notify both namenode(s) that we have received a block */ protected void notifyNamenodeReceivedBlock(int namespaceId, Block block, String delHint) throws IOException { NamespaceService servicePair = get(namespaceId); if (servicePair == null) { throw new IOException("Cannot locate OfferService thread for namespace=" + namespaceId); } servicePair.notifyNamenodeReceivedBlock(block, delHint); } /** * Notify both namenode(s) that we have deleted a block */ protected void notifyNamenodeDeletedBlock(int namespaceId, Block block) throws IOException { NamespaceService servicePair = this.get(namespaceId); if (servicePair == null) { throw new IOException("Cannot locate OfferService thread for namespace=" + namespaceId); } servicePair.notifyNamenodeDeletedBlock(block); } void refreshNamenodes( List<InetSocketAddress> nameAddrs0, List<InetSocketAddress> nameAddrs1, List<InetSocketAddress> avatarAddrs0, List<InetSocketAddress> avatarAddrs1, List<InetSocketAddress> defaultAddrs, Collection<String> nameserviceIds) throws IOException, InterruptedException{ List<Integer> toStart = new ArrayList<Integer>(); List<String> toStartNameserviceIds = new ArrayList<String>(); List<NamespaceService> toStop = new ArrayList<NamespaceService>(); synchronized (refreshNamenodesLock) { synchronized (this) { for (InetSocketAddress nnAddr : nameNodeThreads.keySet()) { if (!nameAddrs0.contains(nnAddr)){ toStop.add(nameNodeThreads.get(nnAddr)); } } Iterator<String> it = nameserviceIds.iterator(); for (int i = 0; i < nameAddrs0.size(); i++) { String nameserviceId = it.hasNext()? it.next() : null; if (!nameNodeThreads.containsKey(nameAddrs0.get(i))) { toStart.add(i); toStartNameserviceIds.add(nameserviceId); } } it = toStartNameserviceIds.iterator(); for (Integer i : toStart) { InetSocketAddress nameAddr0 = nameAddrs0.get(i); nameNodeThreads.put(nameAddr0, new ServicePair(nameAddr0, nameAddrs1.get(i), avatarAddrs0.get(i), avatarAddrs1.get(i), defaultAddrs.get(i), it.next())); } for (NamespaceService nsos : toStop) { remove(nsos); } } } for (NamespaceService nsos : toStop) { nsos.stop(); } startAll(); } } public class ServicePair extends NamespaceService { String defaultAddr; InetSocketAddress nameAddr1; InetSocketAddress nameAddr2; DatanodeProtocol namenode1; DatanodeProtocol namenode2; AvatarProtocol avatarnode1; AvatarProtocol avatarnode2; InetSocketAddress avatarAddr1; InetSocketAddress avatarAddr2; boolean doneRegister1 = false; // not yet registered with namenode1 boolean doneRegister2 = false; // not yet registered with namenode2 OfferService offerService1; OfferService offerService2; volatile OfferService primaryOfferService = null; Thread of1; Thread of2; int namespaceId; String nameserviceId; Thread spThread; AvatarZooKeeperClient zkClient; private NamespaceInfo nsInfo; DatanodeRegistration nsRegistration; private UpgradeManagerDatanode upgradeManager; private volatile boolean initialized = false; private volatile boolean shouldServiceRun = true; volatile long lastBeingAlive = now(); private ServicePair(InetSocketAddress nameAddr1, InetSocketAddress nameAddr2, InetSocketAddress avatarAddr1, InetSocketAddress avatarAddr2, InetSocketAddress defaultAddr, String nameserviceId) { this.nameAddr1 = nameAddr1; this.nameAddr2 = nameAddr2; this.avatarAddr1 = avatarAddr1; this.avatarAddr2 = avatarAddr2; this.defaultAddr = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); this.nameserviceId = nameserviceId; zkClient = new AvatarZooKeeperClient(getConf(), null); this.nsRegistration = new DatanodeRegistration(getMachineName()); } private void setNamespaceInfo(NamespaceInfo nsinfo) { this.nsInfo = nsinfo; this.namespaceId = nsinfo.getNamespaceID(); namespaceManager.addNamespace(this); } private void setupNS() throws IOException { // handshake with NN NamespaceInfo nsInfo; nsInfo = handshake(true); setNamespaceInfo(nsInfo); synchronized(AvatarDataNode.this){ setupNSStorage(); } nsRegistration.setIpcPort(ipcServer.getListenerAddress().getPort()); nsRegistration.setInfoPort(infoServer.getPort()); } private void setupNSStorage() throws IOException { Configuration conf = getConf(); StartupOption startOpt = getStartupOption(conf); assert startOpt != null : "Startup option must be set."; boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false); if (simulatedFSDataset) { nsRegistration.setStorageID(storage.getStorageID()); //same as DN nsRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION; nsRegistration.storageInfo.namespaceID = nsInfo.namespaceID; } else { // read storage info, lock data dirs and transition fs state if necessary // first do it at the top level dataDirs // This is done only once when among all namespaces storage.recoverTransitionRead(AvatarDataNode.this, nsInfo, dataDirs, startOpt); // Then do it for this namespace's directory storage.recoverTransitionRead(AvatarDataNode.this, nsInfo.namespaceID, nsInfo, dataDirs, startOpt, nameserviceId); LOG.info("setting up storage: namespaceId=" + namespaceId + ";lv=" + storage.layoutVersion + ";nsInfo=" + nsInfo); nsRegistration.setStorageInfo( storage.getNStorage(nsInfo.namespaceID), storage.getStorageID()); data.initialize(storage); } data.addNamespace(namespaceId, storage.getNameSpaceDataDir(namespaceId), conf); if (blockScanner != null) { blockScanner.start(); blockScanner.addNamespace(namespaceId); } } @Override public UpgradeManagerDatanode getUpgradeManager() { synchronized (AvatarDataNode.this) { if(upgradeManager == null) upgradeManager = new UpgradeManagerDatanode(AvatarDataNode.this, namespaceId); } return upgradeManager; } public void processUpgradeCommand(UpgradeCommand comm) throws IOException { assert upgradeManager != null : "DataNode.upgradeManager is null."; upgradeManager.processUpgradeCommand(comm); } /** * Start distributed upgrade if it should be initiated by the data-node. */ private void startDistributedUpgradeIfNeeded() throws IOException { UpgradeManagerDatanode um = getUpgradeManager(); if(!um.getUpgradeState()) return; um.setUpgradeState(false, um.getUpgradeVersion()); um.startUpgrade(); return; } public void start() { if ((spThread != null) && (spThread.isAlive())) { //Thread is started already return; } spThread = new Thread(this, dnThreadName + " for namespace " + namespaceId); spThread.setDaemon(true); spThread.start(); } public void stop() { stopServices(); if (spThread != null) { spThread.interrupt(); } } private void initProxy1() throws IOException { synchronized (avatarAddr1) { if (namenode1 == null) { namenode1 = (DatanodeProtocol) RPC.getProxy(DatanodeProtocol.class, DatanodeProtocol.versionID, nameAddr1, getConf()); } if (avatarnode1 == null) { avatarnode1 = (AvatarProtocol) RPC.getProxy(AvatarProtocol.class, AvatarProtocol.versionID, avatarAddr1, getConf()); } } } private void initProxy2() throws IOException { synchronized (avatarAddr2) { if (namenode2 == null) { namenode2 = (DatanodeProtocol) RPC.getProxy(DatanodeProtocol.class, DatanodeProtocol.versionID, nameAddr2, getConf()); } if (avatarnode2 == null) { avatarnode2 = (AvatarProtocol) RPC.getProxy(AvatarProtocol.class, AvatarProtocol.versionID, avatarAddr2, getConf()); } } } public void restartService1() throws IOException { // Rely on handshake to restart the service. synchronized (avatarAddr1) { stopService1(); joinService1(); doneRegister1 = false; } } private void stopService1() { RPC.stopProxy(avatarnode1); RPC.stopProxy(namenode1); avatarnode1 = null; namenode1 = null; if (offerService1 != null) { offerService1.stop(); } if (of1 != null) { of1.interrupt(); } } private void stopService2() { RPC.stopProxy(avatarnode2); RPC.stopProxy(namenode2); avatarnode2 = null; namenode2 = null; if (offerService2 != null) { offerService2.stop(); } if (of2 != null) { of2.interrupt(); } } private void joinService1() { if (of1 != null) { try { of1.join(); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } } private void joinService2() { if (of2 != null) { try { of2.join(); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } } public void restartService2() throws IOException { // Rely on handshake to restart the service. synchronized (avatarAddr2) { stopService2(); joinService2(); doneRegister2 = false; } } /** stop two offer services */ private void stopServices() { this.shouldServiceRun = false; LOG.info("stop services " + this.nameserviceId); stopService1(); stopService2(); if (zkClient != null) { try { zkClient.shutdown(); } catch (InterruptedException ie) { LOG.warn("Zk shutdown is interrupted: ", ie); } } } public void join() { joinServices(); if (spThread != null) { try { spThread.join(); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } spThread = null; } } /** Join two offer services */ private void joinServices() { joinService1(); joinService2(); } public void cleanUp() { if(upgradeManager != null) upgradeManager.shutdownUpgrade(); namespaceManager.remove(this); shouldServiceRun = false; try { RPC.stopProxy(namenode1); } catch (Exception e){ LOG.warn("Exception stop the namenode RPC threads", e); } try { RPC.stopProxy(namenode2); } catch (Exception e){ LOG.warn("Exception stop the namenode RPC threads", e); } if (blockScanner != null) { blockScanner.removeNamespace(this.getNamespaceId()); } if (data != null) { data.removeNamespace(this.getNamespaceId()); } if (storage != null) { storage.removeNamespaceStorage(this.getNamespaceId()); } } public void shutdown() { stop(); join(); } // connect to both name node if possible. // If doWait is true, then return only when at least one handshake is // successful. // private NamespaceInfo handshake(boolean startup) throws IOException { NamespaceInfo nsInfo = null; boolean firstIsPrimary = false; // When true indicates ZK is null and there is no primary. This is to // enable datanode startups during failover. The assumption is that the // layout version of the Standby and Primary would be consistent when // we failover and hence we can speak to any one of the nodes to find out // the NamespaceInfo. boolean noPrimary = false; do { if (startup) { // The startup option is used when the datanode is first created // We only need to connect to the primary at this point and as soon // as possible. So figure out who the primary is from the ZK Stat stat = new Stat(); try { String primaryAddress = zkClient.getPrimaryAvatarAddress(defaultAddr, stat, false); noPrimary = (primaryAddress == null); String firstNNAddress = nameAddr1.getHostName() + ":" + nameAddr1.getPort(); firstIsPrimary = firstNNAddress.equalsIgnoreCase(primaryAddress); } catch (Exception ex) { LOG.error("Could not get the primary address from ZooKeeper", ex); } } try { if ((firstIsPrimary && startup) || !startup || noPrimary) { // only try to connect to the first NN if it is not the // startup connection or if it is primary on startup // This way if it is standby we are not wasting datanode startup time initProxy1(); if (startup) { nsInfo = handshake(namenode1, nameAddr1); } } } catch(ConnectException se) { // namenode has not been started LOG.info("Server at " + nameAddr1 + " not available yet, Zzzzz..."); } catch(SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server timeout. " + nameAddr1); } catch (IOException ioe) { LOG.info("Problem connecting to server. " + nameAddr1, ioe); } try { if ((!firstIsPrimary && startup) || !startup || noPrimary) { initProxy2(); if (startup) { NamespaceInfo tempInfo = handshake(namenode2, nameAddr2); // During failover both layouts should match. if (noPrimary && nsInfo != null && tempInfo.getLayoutVersion() != nsInfo.getLayoutVersion()) { throw new IOException("Layout versions don't match on zero, one: " + nsInfo.getLayoutVersion() + ", " + tempInfo.getLayoutVersion()); } nsInfo = tempInfo; } } } catch(ConnectException se) { // namenode has not been started LOG.info("Server at " + nameAddr2 + " not available yet, Zzzzz..."); } catch(SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server timeout. " + nameAddr2); } catch (RemoteException re) { handleRegistrationError(re); } catch (IOException ioe) { LOG.info("Problem connecting to server. " + nameAddr2, ioe); } } while (startup && nsInfo == null && shouldServiceRun); return nsInfo; } private NamespaceInfo handshake(DatanodeProtocol node, InetSocketAddress machine) throws IOException { NamespaceInfo nsInfo = new NamespaceInfo(); while (shouldServiceRun) { try { nsInfo = node.versionRequest(); break; } catch(SocketTimeoutException e) { // namenode is busy LOG.info("Problem connecting to server: " + machine); try { Thread.sleep(1000); } catch (InterruptedException ie) {} } } String errorMsg = null; // do not fail on incompatible build version if( ! nsInfo.getBuildVersion().equals( Storage.getBuildVersion() )) { errorMsg = "Incompatible build versions: namenode BV = " + nsInfo.getBuildVersion() + "; datanode BV = " + Storage.getBuildVersion(); LOG.warn( errorMsg ); } if (FSConstants.LAYOUT_VERSION != nsInfo.getLayoutVersion()) { errorMsg = "Data-node and name-node layout versions must be the same." + "Expected: "+ FSConstants.LAYOUT_VERSION + " actual "+ nsInfo.getLayoutVersion(); LOG.fatal(errorMsg); try { node.errorReport(nsRegistration, DatanodeProtocol.NOTIFY, errorMsg ); } catch( SocketTimeoutException e ) { // namenode is busy LOG.info("Problem connecting to server: " + machine); } shutdownDN(); throw new IOException(errorMsg); } return nsInfo; } /** * Returns true if we are able to successfully register with namenode */ boolean register(DatanodeProtocol node, InetSocketAddress machine) throws IOException { if (nsRegistration.getStorageID().equals("")) { setNewStorageID(nsRegistration); } DatanodeRegistration tmp = new DatanodeRegistration(nsRegistration.getName()); tmp.setInfoPort(nsRegistration.getInfoPort()); tmp.setIpcPort(nsRegistration.getIpcPort()); boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false); if (simulatedFSDataset) { tmp.setStorageID(storage.getStorageID()); //same as DN tmp.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION; tmp.storageInfo.namespaceID = nsInfo.namespaceID; } else { tmp.setStorageInfo(storage.getNStorage(namespaceId), storage.getStorageID()); } // reset name to machineName. Mainly for web interface. tmp.name = machineName + ":" + nsRegistration.getPort(); try { tmp = node.register(tmp, DataTransferProtocol.DATA_TRANSFER_VERSION); // if we successded registering for the first time, then we update // the global registration objct if (!doneRegister1 && !doneRegister2) { nsRegistration = tmp; } } catch(SocketTimeoutException e) { // namenode is busy LOG.info("Problem connecting to server: " + machine); return false; } assert ("".equals(storage.getStorageID()) && !"".equals(nsRegistration.getStorageID())) || storage.getStorageID().equals(nsRegistration.getStorageID()) : "New storageID can be assigned only if data-node is not formatted"; if (storage.getStorageID().equals("")) { storage.setStorageID(nsRegistration.getStorageID()); storage.writeAll(); LOG.info("New storage id " + nsRegistration.getStorageID() + " is assigned to data-node " + nsRegistration.getName()); } if(! storage.getStorageID().equals(nsRegistration.getStorageID())) { throw new IOException("Inconsistent storage IDs. Name-node returned " + nsRegistration.getStorageID() + ". Expecting " + storage.getStorageID()); } sendBlocksBeingWrittenReport(node, namespaceId, nsRegistration); return true; } boolean isPrimaryOfferService(OfferService service) { return primaryOfferService == service; } void setPrimaryOfferService(OfferService service) { this.primaryOfferService = service; if (service != null) LOG.info("Primary namenode is set to be " + service.avatarnodeAddress); else { LOG.info("Failover has happened. Stop accessing commands from " + "either namenode until the new primary is completely in" + "sync with all the datanodes"); } } private void register1() throws IOException { synchronized(avatarAddr1) { InjectionHandler.processEvent(InjectionEvent.AVATARDATANODE_BEFORE_START_OFFERSERVICE1); if (avatarnode1 != null && namenode1 != null && !doneRegister1 && register(namenode1, nameAddr1)) { InjectionHandler.processEvent(InjectionEvent.AVATARDATANODE_START_OFFERSERVICE1); doneRegister1 = true; offerService1 = new OfferService(AvatarDataNode.this, this, namenode1, nameAddr1, avatarnode1, avatarAddr1); of1 = new Thread(offerService1, "OfferService1 " + nameAddr1); of1.start(); } } } private void register2() throws IOException { synchronized(avatarAddr2) { if (avatarnode2 != null && namenode2 != null && !doneRegister2 && register(namenode2, nameAddr2)) { InjectionHandler.processEvent(InjectionEvent.AVATARDATANODE_START_OFFERSERVICE2); doneRegister2 = true; offerService2 = new OfferService(AvatarDataNode.this, this, namenode2, nameAddr2, avatarnode2, avatarAddr2); of2 = new Thread(offerService2, "OfferService2 " + nameAddr2); of2.start(); } } } @Override public void run() { LOG.info(nsRegistration + "In AvatarDataNode.run, data = " + data); try { // set up namespace try { setupNS(); } catch (IOException ioe) { // Initial handshake, storage recovery or registration failed LOG.fatal(nsRegistration + " initialization failed for namespaceId " + namespaceId, ioe); return; } while (shouldServiceRun && shouldRun) { try { // try handshaking with any namenode that we have not yet tried handshake(false); try { register1(); } finally { register2(); } this.initialized = true; startDistributedUpgradeIfNeeded(); } catch (RemoteException re) { handleRegistrationError(re); } catch (Exception ex) { LOG.error("Exception: ", ex); } if (shouldServiceRun && shouldRun) { try { Thread.sleep(5000); } catch (InterruptedException ie) { } } } } finally { LOG.info(nsRegistration + ":Finishing AvatarDataNode in: "+data); stopServices(); joinServices(); cleanUp(); } } /** * Notify both namenode(s) that we have received a block */ @Override public void notifyNamenodeReceivedBlock(Block block, String delHint) { if (offerService1 != null) { offerService1.notifyNamenodeReceivedBlock(block, delHint); } if (offerService2 != null) { offerService2.notifyNamenodeReceivedBlock(block, delHint); } } /** * Notify both namenode(s) that we have deleted a block */ @Override public void notifyNamenodeDeletedBlock(Block block) { if (offerService1 != null) { offerService1.notifyNamenodeDeletedBlock(block); } if (offerService2 != null) { offerService2.notifyNamenodeDeletedBlock(block); } } /** * Update received and retry list, when blocks are deleted */ void removeReceivedBlocks(Block[] list) { if (offerService1 != null) { offerService1.removeReceivedBlocks(list); } if (offerService2 != null) { offerService2.removeReceivedBlocks(list); } } @Override public DatanodeRegistration getNsRegistration() { return nsRegistration; } @Override public DatanodeProtocol getDatanodeProtocol() { return this.primaryOfferService.namenode; } @Override public InetSocketAddress getNNSocketAddress() { return this.nameAddr1; } @Override public int getNamespaceId() { return this.namespaceId; } @Override public String getNameserviceId() { return this.nameserviceId; } @Override public boolean initialized() { return initialized; } @Override public boolean isAlive() { return shouldServiceRun && spThread.isAlive(); } @Override public void reportBadBlocks(LocatedBlock[] blocks) throws IOException { if (this.offerService1 != null) this.offerService1.reportBadBlocks(blocks); if (this.offerService2 != null) this.offerService2.reportBadBlocks(blocks); } @Override public LocatedBlock syncBlock(Block block, List<BlockRecord> syncList, boolean closeFile, List<InterDatanodeProtocol> datanodeProxies, long deadline) throws IOException { if (offerService1 != null && isPrimaryOfferService(offerService1)) return offerService1.syncBlock(block, syncList, closeFile, datanodeProxies, deadline); if (offerService2 != null && isPrimaryOfferService(offerService2)) return offerService2.syncBlock(block, syncList, closeFile, datanodeProxies, deadline); return null; } @Override public void scheduleBlockReport(long delay) { if (this.offerService1 != null) this.offerService1.scheduleBlockReport(delay); if (this.offerService2 != null) this.offerService2.scheduleBlockReport(delay); } // Only use for testing public void scheduleBlockReceivedAndDeleted(long delay) { if (this.offerService1 != null) this.offerService1.scheduleBlockReceivedAndDeleted(delay); if (this.offerService2 != null) this.offerService2.scheduleBlockReceivedAndDeleted(delay); } } /** * Tells the datanode to start the shutdown process. */ public synchronized void shutdownDN() { shouldRun = false; if (namespaceManager != null) { namespaceManager.stopAll(); } } DataStorage getStorage() { return storage; } private static void printUsage() { System.err.println("Usage: java DataNode"); System.err.println(" [-rollback]"); } /** * Parse and verify command line arguments and set configuration parameters. * * @return false if passed argements are incorrect */ private static boolean parseArguments(String args[], Configuration conf) { int argsLen = (args == null) ? 0 : args.length; StartupOption startOpt = StartupOption.REGULAR; for(int i=0; i < argsLen; i++) { String cmd = args[i]; if ("-r".equalsIgnoreCase(cmd) || "--rack".equalsIgnoreCase(cmd)) { LOG.error("-r, --rack arguments are not supported anymore. RackID " + "resolution is handled by the NameNode."); System.exit(-1); } else if ("-rollback".equalsIgnoreCase(cmd)) { startOpt = StartupOption.ROLLBACK; } else if ("-regular".equalsIgnoreCase(cmd)) { startOpt = StartupOption.REGULAR; } else return false; } setStartupOption(conf, startOpt); return true; } private static void setStartupOption(Configuration conf, StartupOption opt) { conf.set("dfs.datanode.startup", opt.toString()); } /** * Returns the IP address of the namenode */ static InetSocketAddress getNameNodeAddress(Configuration conf, String cname, String rpcKey, String cname2) { String fs = conf.get(cname); String fs1 = conf.get(rpcKey); String fs2 = conf.get(cname2); Configuration newconf = new Configuration(conf); newconf.set("fs.default.name", fs); if (fs1 != null) { newconf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, fs1); } if (fs2 != null) { newconf.set("dfs.namenode.dn-address", fs2); } return DataNode.getNameNodeAddress(newconf); } @Override public InetSocketAddress getNameNodeAddr() { return NameNode.getAddress(getConf()); } /** * Returns the IP:port address of the avatar node */ private static InetSocketAddress getAvatarNodeAddress(Configuration conf, String cname) { String fs = conf.get(cname); Configuration newconf = new Configuration(conf); newconf.set("fs.default.name", fs); return AvatarNode.getAddress(newconf); } /** * Returns the IP:port address of the avatar node */ public static List<InetSocketAddress> getAvatarNodeAddresses(String suffix, Configuration conf, Collection<String> serviceIds) throws IOException{ List<InetSocketAddress> namenodeAddresses = DFSUtil.getRPCAddresses(suffix, conf, serviceIds, FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> avatarnodeAddresses = new ArrayList<InetSocketAddress>(namenodeAddresses.size()); for (InetSocketAddress namenodeAddress : namenodeAddresses) { avatarnodeAddresses.add( new InetSocketAddress(namenodeAddress.getHostName(),conf.getInt( "dfs.avatarnode.port", namenodeAddress.getPort() + 1))); } return avatarnodeAddresses; } public static AvatarDataNode makeInstance(String[] dataDirs, Configuration conf) throws IOException { ArrayList<File> dirs = new ArrayList<File>(); for (int i = 0; i < dataDirs.length; i++) { File data = new File(dataDirs[i]); try { DiskChecker.checkDir(data); dirs.add(data); } catch(DiskErrorException e) { LOG.warn("Invalid directory in dfs.data.dir: " + e.getMessage()); } } if (dirs.size() > 0) { String dnThreadName = "AvatarDataNode: [" + StringUtils.arrayToString(dataDirs) + "]"; return new AvatarDataNode(conf, dirs, dnThreadName); } LOG.error("All directories in dfs.data.dir are invalid."); return null; } /** Instantiate a single datanode object. This must be run by invoking * {@link DataNode#runDatanodeDaemon(DataNode)} subsequently. */ public static AvatarDataNode instantiateDataNode(String args[], Configuration conf) throws IOException { if (conf == null) conf = new Configuration(); if (!parseArguments(args, conf)) { printUsage(); return null; } if (conf.get("dfs.network.script") != null) { LOG.error("This configuration for rack identification is not supported" + " anymore. RackID resolution is handled by the NameNode."); System.exit(-1); } String[] dataDirs = conf.getStrings("dfs.data.dir"); return makeInstance(dataDirs, conf); } public static AvatarDataNode createDataNode(String args[], Configuration conf) throws IOException { AvatarDataNode dn = instantiateDataNode(args, conf); dn.runDatanodeDaemon(); return dn; } @Override public void refreshNamenodes(Configuration conf) throws IOException { LOG.info("refresh namenodes"); try { Collection<String> serviceIds = DFSUtil.getNameServiceIds(conf); List<InetSocketAddress> defaultNameAddrs = AvatarDataNode.getDatanodeProtocolAddresses(conf, serviceIds); List<InetSocketAddress> nameAddrs0 = DFSUtil.getRPCAddresses("0", conf, serviceIds, NameNode.DATANODE_PROTOCOL_ADDRESS, FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> nameAddrs1 = DFSUtil.getRPCAddresses("1", conf, serviceIds, NameNode.DATANODE_PROTOCOL_ADDRESS, FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> avatarAddrs0 = AvatarDataNode.getAvatarNodeAddresses("0", conf, serviceIds); List<InetSocketAddress> avatarAddrs1 = AvatarDataNode.getAvatarNodeAddresses("1", conf, serviceIds); ((AvatarNamespaceManager)namespaceManager).refreshNamenodes( nameAddrs0, nameAddrs1, avatarAddrs0, avatarAddrs1, defaultNameAddrs, serviceIds); } catch (InterruptedException e) { throw new IOException(e.getCause()); } } void handleRegistrationError(RemoteException re) { // If either the primary or standby NN throws these exceptions, this // datanode will exit. I think this is the right behaviour because // the excludes list on both namenode better be the same. String reClass = re.getClassName(); if (UnregisteredDatanodeException.class.getName().equals(reClass) || DisallowedDatanodeException.class.getName().equals(reClass) || IncorrectVersionException.class.getName().equals(reClass)) { LOG.warn("DataNode is shutting down: ", re); shutdownDN(); } else { LOG.warn(re); } } public static void main(String argv[]) { try { StringUtils.startupShutdownMessage(AvatarDataNode.class, argv, LOG); AvatarDataNode avatarnode = createDataNode(argv, null); if (avatarnode != null) { avatarnode.waitAndShutdown(); } } catch (Throwable e) { LOG.error(StringUtils.stringifyException(e)); System.exit(-1); } } }