/** * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.catalog; import java.io.EOFException; import java.io.IOException; import java.net.ConnectException; import java.net.SocketTimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.MetaNodeTracker; import org.apache.hadoop.hbase.zookeeper.RootRegionTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.ipc.RemoteException; /** * Tracks the availability of the catalog tables <code>-ROOT-</code> and * <code>.META.</code>. * * This class is "read-only" in that the locations of the catalog tables cannot * be explicitly set. Instead, ZooKeeper is used to learn of the availability * and location of <code>-ROOT-</code>. <code>-ROOT-</code> is used to learn of * the location of <code>.META.</code> If not available in <code>-ROOT-</code>, * ZooKeeper is used to monitor for a new location of <code>.META.</code>. * * <p>Call {@link #start()} to start up operation. Call {@link #stop()}} to * interrupt waits and close up shop. */ public class CatalogTracker { private static final Log LOG = LogFactory.getLog(CatalogTracker.class); private final HConnection connection; private final ZooKeeperWatcher zookeeper; private final RootRegionTracker rootRegionTracker; private final MetaNodeTracker metaNodeTracker; private final AtomicBoolean metaAvailable = new AtomicBoolean(false); /** * Do not clear this address once set. Let it be cleared by * {@link #setMetaLocation(HServerAddress)} only. Its needed when we do * server shutdown processing -- we need to know who had .META. last. If you * want to know if the address is good, rely on {@link #metaAvailable} value. */ private HServerAddress metaLocation; private final int defaultTimeout; private boolean stopped = false; public static final byte [] ROOT_REGION = HRegionInfo.ROOT_REGIONINFO.getRegionName(); public static final byte [] META_REGION = HRegionInfo.FIRST_META_REGIONINFO.getRegionName(); /** * Constructs a catalog tracker. Find current state of catalog tables and * begin active tracking by executing {@link #start()} post construction. * Does not timeout. * @param connection Server connection; if problem, this connections * {@link HConnection#abort(String, Throwable)} will be called. * @throws IOException */ public CatalogTracker(final HConnection connection) throws IOException { this(connection.getZooKeeperWatcher(), connection, connection); } /** * Constructs the catalog tracker. Find current state of catalog tables and * begin active tracking by executing {@link #start()} post construction. * Does not timeout. * @param zk * @param connection server connection * @param abortable if fatal exception * @throws IOException */ public CatalogTracker(final ZooKeeperWatcher zk, final HConnection connection, final Abortable abortable) throws IOException { this(zk, connection, abortable, 0); } /** * Constructs the catalog tracker. Find current state of catalog tables and * begin active tracking by executing {@link #start()} post construction. * @param zk * @param connection server connection * @param abortable if fatal exception * @param defaultTimeout Timeout to use. Pass zero for no timeout * ({@link Object#wait(long)} when passed a <code>0</code> waits for ever). * @throws IOException */ public CatalogTracker(final ZooKeeperWatcher zk, final HConnection connection, final Abortable abortable, final int defaultTimeout) throws IOException { this.zookeeper = zk; this.connection = connection; this.rootRegionTracker = new RootRegionTracker(zookeeper, abortable); this.metaNodeTracker = new MetaNodeTracker(zookeeper, this, abortable); this.defaultTimeout = defaultTimeout; } /** * Starts the catalog tracker. * Determines current availability of catalog tables and ensures all further * transitions of either region are tracked. * @throws IOException * @throws InterruptedException */ public void start() throws IOException, InterruptedException { this.rootRegionTracker.start(); this.metaNodeTracker.start(); LOG.debug("Starting catalog tracker " + this); } /** * Stop working. * Interrupts any ongoing waits. */ public void stop() { LOG.debug("Stopping catalog tracker " + this); this.stopped = true; this.rootRegionTracker.stop(); this.metaNodeTracker.stop(); // Call this and it will interrupt any ongoing waits on meta. synchronized (this.metaAvailable) { this.metaAvailable.notifyAll(); } } /** * Gets the current location for <code>-ROOT-</code> or null if location is * not currently available. * @return location of root, null if not available * @throws InterruptedException */ public HServerAddress getRootLocation() throws InterruptedException { return this.rootRegionTracker.getRootRegionLocation(); } /** * @return Location of meta or null if not yet available. */ public HServerAddress getMetaLocation() { return this.metaLocation; } /** * Waits indefinitely for availability of <code>-ROOT-</code>. Used during * cluster startup. * @throws InterruptedException if interrupted while waiting */ public void waitForRoot() throws InterruptedException { this.rootRegionTracker.blockUntilAvailable(); } /** * Gets the current location for <code>-ROOT-</code> if available and waits * for up to the specified timeout if not immediately available. Returns null * if the timeout elapses before root is available. * @param timeout maximum time to wait for root availability, in milliseconds * @return location of root * @throws InterruptedException if interrupted while waiting * @throws NotAllMetaRegionsOnlineException if root not available before * timeout */ HServerAddress waitForRoot(final long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException { HServerAddress address = rootRegionTracker.waitRootRegionLocation(timeout); if (address == null) { throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms"); } return address; } /** * Gets a connection to the server hosting root, as reported by ZooKeeper, * waiting up to the specified timeout for availability. * @see #waitForRoot(long) for additional information * @return connection to server hosting root * @throws InterruptedException * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException */ public HRegionInterface waitForRootServerConnection(long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { return getCachedConnection(waitForRoot(timeout)); } /** * Gets a connection to the server hosting root, as reported by ZooKeeper, * waiting for the default timeout specified on instantiation. * @see #waitForRoot(long) for additional information * @return connection to server hosting root * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException */ public HRegionInterface waitForRootServerConnectionDefault() throws NotAllMetaRegionsOnlineException, IOException { try { return getCachedConnection(waitForRoot(defaultTimeout)); } catch (InterruptedException e) { throw new NotAllMetaRegionsOnlineException("Interrupted"); } } /** * Gets a connection to the server hosting root, as reported by ZooKeeper, * if available. Returns null if no location is immediately available. * @return connection to server hosting root, null if not available * @throws IOException * @throws InterruptedException */ private HRegionInterface getRootServerConnection() throws IOException, InterruptedException { HServerAddress address = this.rootRegionTracker.getRootRegionLocation(); if (address == null) { return null; } return getCachedConnection(address); } /** * Gets a connection to the server currently hosting <code>.META.</code> or * null if location is not currently available. * <p> * If a location is known, a connection to the cached location is returned. * If refresh is true, the cached connection is verified first before * returning. If the connection is not valid, it is reset and rechecked. * <p> * If no location for meta is currently known, method checks ROOT for a new * location, verifies META is currently there, and returns a cached connection * to the server hosting META. * * @return connection to server hosting meta, null if location not available * @throws IOException * @throws InterruptedException */ private HRegionInterface getMetaServerConnection(boolean refresh) throws IOException, InterruptedException { synchronized (metaAvailable) { if (metaAvailable.get()) { HRegionInterface current = getCachedConnection(metaLocation); if (!refresh) { return current; } if (verifyRegionLocation(current, this.metaLocation, META_REGION)) { return current; } resetMetaLocation(); } HRegionInterface rootConnection = getRootServerConnection(); if (rootConnection == null) { return null; } HServerAddress newLocation = MetaReader.readMetaLocation(rootConnection); if (newLocation == null) { return null; } HRegionInterface newConnection = getCachedConnection(newLocation); if (verifyRegionLocation(newConnection, this.metaLocation, META_REGION)) { setMetaLocation(newLocation); return newConnection; } return null; } } /** * Waits indefinitely for availability of <code>.META.</code>. Used during * cluster startup. * @throws InterruptedException if interrupted while waiting */ public void waitForMeta() throws InterruptedException { synchronized (metaAvailable) { while (!stopped && !metaAvailable.get()) { metaAvailable.wait(); } } } /** * Gets the current location for <code>.META.</code> if available and waits * for up to the specified timeout if not immediately available. Throws an * exception if timed out waiting. This method differs from {@link #waitForMeta()} * in that it will go ahead and verify the location gotten from ZooKeeper by * trying to use returned connection. * @param timeout maximum time to wait for meta availability, in milliseconds * @return location of meta * @throws InterruptedException if interrupted while waiting * @throws IOException unexpected exception connecting to meta server * @throws NotAllMetaRegionsOnlineException if meta not available before * timeout */ public HServerAddress waitForMeta(long timeout) throws InterruptedException, IOException, NotAllMetaRegionsOnlineException { long stop = System.currentTimeMillis() + timeout; synchronized (metaAvailable) { if (getMetaServerConnection(true) != null) { return metaLocation; } while(!stopped && !metaAvailable.get() && (timeout == 0 || System.currentTimeMillis() < stop)) { metaAvailable.wait(timeout); } if (getMetaServerConnection(true) == null) { throw new NotAllMetaRegionsOnlineException( "Timed out (" + timeout + "ms)"); } return metaLocation; } } /** * Gets a connection to the server hosting meta, as reported by ZooKeeper, * waiting up to the specified timeout for availability. * @see #waitForMeta(long) for additional information * @return connection to server hosting meta * @throws InterruptedException * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException */ public HRegionInterface waitForMetaServerConnection(long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { return getCachedConnection(waitForMeta(timeout)); } /** * Gets a connection to the server hosting meta, as reported by ZooKeeper, * waiting up to the specified timeout for availability. * @see #waitForMeta(long) for additional information * @return connection to server hosting meta * @throws NotAllMetaRegionsOnlineException if timed out or interrupted * @throws IOException */ public HRegionInterface waitForMetaServerConnectionDefault() throws NotAllMetaRegionsOnlineException, IOException { try { return getCachedConnection(waitForMeta(defaultTimeout)); } catch (InterruptedException e) { throw new NotAllMetaRegionsOnlineException("Interrupted"); } } private void resetMetaLocation() { LOG.info("Current cached META location is not valid, resetting"); this.metaAvailable.set(false); } private void setMetaLocation(HServerAddress metaLocation) { metaAvailable.set(true); this.metaLocation = metaLocation; // no synchronization because these are private and already under lock metaAvailable.notifyAll(); } private HRegionInterface getCachedConnection(HServerAddress address) throws IOException { HRegionInterface protocol = null; try { protocol = connection.getHRegionConnection(address, false); } catch (RetriesExhaustedException e) { if (e.getCause() != null && e.getCause() instanceof ConnectException) { // Catch this; presume it means the cached connection has gone bad. } else { throw e; } } catch (SocketTimeoutException e) { // We were passed the wrong address. Return 'protocol' == null. LOG.debug("Timed out connecting to " + address); } catch (IOException ioe) { Throwable cause = ioe.getCause(); if (cause != null && cause instanceof EOFException) { // Catch. Other end disconnected us. } else if (cause != null && cause.getMessage() != null && cause.getMessage().toLowerCase().contains("connection reset")) { // Catch. Connection reset. } else { throw ioe; } } return protocol; } private boolean verifyRegionLocation(HRegionInterface metaServer, final HServerAddress address, byte [] regionName) throws IOException { if (metaServer == null) { LOG.info("Passed metaserver is null"); return false; } Throwable t = null; try { return metaServer.getRegionInfo(regionName) != null; } catch (ConnectException e) { t = e; } catch (RemoteException e) { IOException ioe = e.unwrapRemoteException(); if (ioe instanceof NotServingRegionException) { t = ioe; } else { throw e; } } catch (IOException e) { Throwable cause = e.getCause(); if (cause != null && cause instanceof EOFException) { t = cause; } else if (cause != null && cause.getMessage() != null && cause.getMessage().contains("Connection reset")) { t = cause; } else { throw e; } } LOG.info("Failed verification of " + Bytes.toString(regionName) + " at address=" + address + "; " + t); return false; } /** * Verify <code>-ROOT-</code> is deployed and accessible. * @param timeout How long to wait on zk for root address (passed through to * the internal call to {@link #waitForRootServerConnection(long)}. * @return True if the <code>-ROOT-</code> location is healthy. * @throws IOException * @throws InterruptedException */ public boolean verifyRootRegionLocation(final long timeout) throws InterruptedException, IOException { HRegionInterface connection = null; try { connection = waitForRootServerConnection(timeout); } catch (NotAllMetaRegionsOnlineException e) { // Pass } catch (org.apache.hadoop.hbase.ipc.ServerNotRunningException e) { // Pass -- remote server is not up so can't be carrying root } catch (IOException e) { // Unexpected exception throw e; } return (connection == null)? false: verifyRegionLocation(connection,this.rootRegionTracker.getRootRegionLocation(), HRegionInfo.ROOT_REGIONINFO.getRegionName()); } /** * Verify <code>.META.</code> is deployed and accessible. * @param timeout How long to wait on zk for <code>.META.</code> address * (passed through to the internal call to {@link #waitForMetaServerConnection(long)}. * @return True if the <code>.META.</code> location is healthy. * @throws IOException Some unexpected IOE. * @throws InterruptedException */ public boolean verifyMetaRegionLocation(final long timeout) throws InterruptedException, IOException { return getMetaServerConnection(true) != null; } MetaNodeTracker getMetaNodeTracker() { return this.metaNodeTracker; } public HConnection getConnection() { return this.connection; } }