ReplicationSource.java example

Explorer

CCIndex_HBase_0.90.0-master
- src
  - main
    - java
      - org
        apache
        hadoop
        hbase
        Abortable.java
        CCIndexTestCase.java
        Chore.java
        ClockOutOfSyncException.java
        ClusterStatus.java
        DoNotRetryIOException.java
        DroppedSnapshotException.java
        HBaseConfiguration.java
        HColumnDescriptor.java
        HConstants.java
        HMsg.java
        HRegionInfo.java
        HRegionLocation.java
        HServerAddress.java
        HServerInfo.java
        HServerLoad.java
        HTableDescriptor.java
        InvalidFamilyOperationException.java
        KeyValue.java
        LocalHBaseCluster.java
        MasterAddressTracker.java
        MasterNotRunningException.java
        NotAllMetaRegionsOnlineException.java
        NotServingRegionException.java
        PleaseHoldException.java
        RegionException.java
        RemoteExceptionHandler.java
        Server.java
        Stoppable.java
        TableExistsException.java
        TableNotDisabledException.java
        TableNotFoundException.java
        UnknownRegionException.java
        UnknownRowLockException.java
        UnknownScannerException.java
        VersionAnnotation.java
        YouAreDeadException.java
        ZooKeeperConnectionException.java
        avro
        AvroServer.java
        AvroUtil.java
        generated
        AAlreadyExists.java
        AClusterStatus.java
        AColumn.java
        AColumnFamilyDescriptor.java
        AColumnValue.java
        ACompressionAlgorithm.java
        ADelete.java
        AFamilyDescriptor.java
        AGet.java
        AIOError.java
        AIllegalArgument.java
        AMasterNotRunning.java
        APut.java
        ARegionLoad.java
        AResult.java
        AResultEntry.java
        AScan.java
        AServerAddress.java
        AServerInfo.java
        AServerLoad.java
        ATableDescriptor.java
        ATableExists.java
        ATimeRange.java
        HBase.java
        IOError.java
        TCell.java
        catalog
        CatalogTracker.java
        MetaEditor.java
        MetaReader.java
        RootLocationEditor.java
        client
        Action.java
        Delete.java
        Get.java
        HBaseAdmin.java
        HConnection.java
        HConnectionManager.java
        HTable.java
        HTableFactory.java
        HTableInterface.java
        HTableInterfaceFactory.java
        HTablePool.java
        Increment.java
        MetaScanner.java
        MultiAction.java
        MultiPut.java
        MultiPutResponse.java
        MultiResponse.java
        NoServerForRegionException.java
        Put.java
        RegionOfflineException.java
        Result.java
        ResultScanner.java
        RetriesExhaustedException.java
        RetriesExhaustedWithDetailsException.java
        Row.java
        RowLock.java
        Scan.java
        ScannerCallable.java
        ScannerTimeoutException.java
        ServerCallable.java
        UnmodifyableHColumnDescriptor.java
        UnmodifyableHRegionInfo.java
        UnmodifyableHTableDescriptor.java
        ccindex
        CCIndexAdmin.java
        CCIndexConstants.java
        CCIndexDescriptor.java
        HTable.java
        IndexKeyGenerator.java
        IndexNotFoundException.java
        IndexSpecification.java
        IndexSpecificationArray.java
        IndexedTable.java
        Optimizer.java
        Range.java
        ResultReader.java
        SimpleIndexKeyGenerator.java
        SimpleOptimizer.java
        SingleReader.java
        Utilities.java
        test.java
        package-info.java
        replication
        ReplicationAdmin.java
        executor
        EventHandler.java
        ExecutorService.java
        RegionTransitionData.java
        filter
        BinaryComparator.java
        BinaryPrefixComparator.java
        ColumnCountGetFilter.java
        ColumnPaginationFilter.java
        ColumnPrefixFilter.java
        CompareFilter.java
        DependentColumnFilter.java
        FamilyFilter.java
        Filter.java
        FilterBase.java
        FilterList.java
        FirstKeyOnlyFilter.java
        InclusiveStopFilter.java
        IncompatibleFilterException.java
        InvalidRowFilterException.java
        KeyOnlyFilter.java
        PageFilter.java
        PrefixFilter.java
        QualifierFilter.java
        RegexStringComparator.java
        RowFilter.java
        SingleColumnValueExcludeFilter.java
        SingleColumnValueFilter.java
        SkipFilter.java
        SubstringComparator.java
        TimestampsFilter.java
        ValueFilter.java
        WhileMatchFilter.java
        WritableByteArrayComparable.java
        package-info.java
        io
        CodeToClassAndBack.java
        HalfStoreFileReader.java
        HbaseMapWritable.java
        HbaseObjectWritable.java
        HeapSize.java
        ImmutableBytesWritable.java
        Reference.java
        TimeRange.java
        WritableWithSize.java
        hfile
        BlockCache.java
        BoundedRangeFileInputStream.java
        CachedBlock.java
        CachedBlockQueue.java
        Compression.java
        HFile.java
        HFileScanner.java
        LruBlockCache.java
        SimpleBlockCache.java
        ipc
        ByteBufferOutputStream.java
        HBaseClient.java
        HBaseRPC.java
        HBaseRPCErrorHandler.java
        HBaseRPCProtocolVersion.java
        HBaseRPCStatistics.java
        HBaseRpcMetrics.java
        HBaseServer.java
        HMasterInterface.java
        HMasterRegionInterface.java
        HRegionInterface.java
        ServerNotRunningException.java
        mapred
        Driver.java
        GroupingTableMap.java
        HRegionPartitioner.java
        IdentityTableMap.java
        IdentityTableReduce.java
        RowCounter.java
        TableInputFormat.java
        TableInputFormatBase.java
        TableMap.java
        TableMapReduceUtil.java
        TableOutputFormat.java
        TableRecordReader.java
        TableRecordReaderImpl.java
        TableReduce.java
        TableSplit.java
        package-info.java
        mapreduce
        CopyTable.java
        Driver.java
        Export.java
        GroupingTableMapper.java
        HFileOutputFormat.java
        HRegionPartitioner.java
        IdentityTableMapper.java
        IdentityTableReducer.java
        Import.java
        ImportTsv.java
        KeyValueSortReducer.java
        LoadIncrementalHFiles.java
        MultiTableOutputFormat.java
        PutSortReducer.java
        RowCounter.java
        SimpleTotalOrderPartitioner.java
        TableInputFormat.java
        TableInputFormatBase.java
        TableMapReduceUtil.java
        TableMapper.java
        TableOutputCommitter.java
        TableOutputFormat.java
        TableRecordReader.java
        TableRecordReaderImpl.java
        TableReducer.java
        TableSplit.java
        hadoopbackport
        InputSampler.java
        TotalOrderPartitioner.java
        package-info.java
        replication
        VerifyReplication.java
        master
        ActiveMasterManager.java
        AssignmentManager.java
        BulkAssigner.java
        CatalogJanitor.java
        DeadServer.java
        HMaster.java
        HMasterCommandLine.java
        LoadBalancer.java
        LogCleaner.java
        LogCleanerDelegate.java
        MasterFileSystem.java
        MasterServices.java
        ServerManager.java
        TimeToLiveLogCleaner.java
        handler
        ClosedRegionHandler.java
        DeleteTableHandler.java
        DisableTableHandler.java
        EnableTableHandler.java
        MetaServerShutdownHandler.java
        ModifyTableHandler.java
        OpenedRegionHandler.java
        ServerShutdownHandler.java
        TableAddFamilyHandler.java
        TableDeleteFamilyHandler.java
        TableEventHandler.java
        TableModifyFamilyHandler.java
        TotesHRegionInfo.java
        metrics
        MasterMetrics.java
        MasterStatistics.java
        metrics
        HBaseInfo.java
        MetricsMBeanBase.java
        MetricsRate.java
        MetricsString.java
        PersistentMetricsTimeVaryingRate.java
        file
        TimeStampingFileContext.java
        regionserver
        ChangedReadersObserver.java
        ColumnCount.java
        ColumnTracker.java
        CompactSplitThread.java
        CompactionRequestor.java
        DebugPrint.java
        DeleteTracker.java
        ExplicitColumnTracker.java
        FlushRequester.java
        GetClosestRowBeforeTracker.java
        HRegion.java
        HRegionServer.java
        HRegionServerCommandLine.java
        InternalScan.java
        InternalScanner.java
        KeyValueHeap.java
        KeyValueScanner.java
        KeyValueSkipListSet.java
        LeaseException.java
        LeaseListener.java
        Leases.java
        LogRoller.java
        LruHashMap.java
        MemStore.java
        MemStoreFlusher.java
        NoSuchColumnFamilyException.java
        OnlineRegions.java
        PriorityCompactionQueue.java
        ReadWriteConsistencyControl.java
        RegionServerRunningException.java
        RegionServerServices.java
        RegionServerStoppedException.java
        ScanDeleteTracker.java
        ScanQueryMatcher.java
        ScanWildcardColumnTracker.java
        ShutdownHook.java
        SplitTransaction.java
        Store.java
        StoreFile.java
        StoreFileScanner.java
        StoreFlusher.java
        StoreScanner.java
        TimeRangeTracker.java
        WrongRegionException.java
        ccindex
        ByteUtil.java
        Checker.java
        CheckerMaster.java
        Flusher.java
        IndexMaintenanceException.java
        IndexMaintenanceUtils.java
        IndexedRegion.java
        IndexedRegionServer.java
        handler
        CloseMetaHandler.java
        CloseRegionHandler.java
        CloseRootHandler.java
        OpenMetaHandler.java
        OpenRegionHandler.java
        OpenRootHandler.java
        metrics
        RegionServerMetrics.java
        RegionServerStatistics.java
        wal
        FailedLogCloseException.java
        HLog.java
        HLogKey.java
        HLogSplitter.java
        OrphanHLogAfterSplitException.java
        SequenceFileLogReader.java
        SequenceFileLogWriter.java
        WALEdit.java
        WALObserver.java
        replication
        ReplicationPeer.java
        ReplicationZookeeper.java
        master
        ReplicationLogCleaner.java
        regionserver
        Replication.java
        ReplicationSink.java
        ReplicationSinkMetrics.java
        ReplicationSource.java
        ReplicationSourceInterface.java
        ReplicationSourceManager.java
        ReplicationSourceMetrics.java
        ReplicationStatistics.java
        rest
        Constants.java
        ExistsResource.java
        Main.java
        ProtobufMessageHandler.java
        RESTServlet.java
        RegionsResource.java
        ResourceBase.java
        ResourceConfig.java
        ResultGenerator.java
        RootResource.java
        RowResource.java
        RowResultGenerator.java
        RowSpec.java
        ScannerInstanceResource.java
        ScannerResource.java
        ScannerResultGenerator.java
        SchemaResource.java
        StorageClusterStatusResource.java
        StorageClusterVersionResource.java
        TableResource.java
        VersionResource.java
        client
        Client.java
        Cluster.java
        RemoteAdmin.java
        RemoteHTable.java
        Response.java
        filter
        GZIPRequestStream.java
        GZIPRequestWrapper.java
        GZIPResponseStream.java
        GZIPResponseWrapper.java
        GzipFilter.java
        metrics
        RESTMetrics.java
        RESTStatistics.java
        model
        CellModel.java
        CellSetModel.java
        ColumnSchemaModel.java
        RowModel.java
        ScannerModel.java
        StorageClusterStatusModel.java
        StorageClusterVersionModel.java
        TableInfoModel.java
        TableListModel.java
        TableModel.java
        TableRegionModel.java
        TableSchemaModel.java
        VersionModel.java
        protobuf
        generated
        CellMessage.java
        CellSetMessage.java
        ColumnSchemaMessage.java
        ScannerMessage.java
        StorageClusterStatusMessage.java
        TableInfoMessage.java
        TableListMessage.java
        TableSchemaMessage.java
        VersionMessage.java
        provider
        JAXBContextResolver.java
        consumer
        ProtobufMessageBodyConsumer.java
        producer
        PlainTextMessageBodyProducer.java
        ProtobufMessageBodyProducer.java
        transform
        Base64.java
        NullTransform.java
        Transform.java
        security
        User.java
        thrift
        ThriftServer.java
        ThriftUtilities.java
        generated
        AlreadyExists.java
        BatchMutation.java
        ColumnDescriptor.java
        Hbase.java
        IOError.java
        IllegalArgument.java
        Mutation.java
        TCell.java
        TRegionInfo.java
        TRowResult.java
        util
        Base64.java
        BloomFilter.java
        ByteBloomFilter.java
        Bytes.java
        ClassSize.java
        CompressionTest.java
        DefaultEnvironmentEdge.java
        DynamicByteBloomFilter.java
        EnvironmentEdge.java
        EnvironmentEdgeManager.java
        FSUtils.java
        FileSystemVersionException.java
        HBaseConfTool.java
        HBaseFsck.java
        HBaseFsckRepair.java
        HMerge.java
        Hash.java
        IncrementingEnvironmentEdge.java
        InfoServer.java
        JVMClusterUtil.java
        JenkinsHash.java
        JvmVersion.java
        Keying.java
        MD5Hash.java
        ManualEnvironmentEdge.java
        Merge.java
        MetaUtils.java
        MurmurHash.java
        Pair.java
        PairOfSameType.java
        ServerCommandLine.java
        Sleeper.java
        SoftValueSortedMap.java
        Strings.java
        Threads.java
        VersionInfo.java
        Writables.java
        zookeeper
        ClusterStatusTracker.java
        HQuorumPeer.java
        MetaNodeTracker.java
        MiniZooKeeperCluster.java
        RegionServerTracker.java
        RootRegionTracker.java
        ZKAssign.java
        ZKConfig.java
        ZKServerTool.java
        ZKTable.java
        ZKTableDisable.java
        ZKUtil.java
        ZooKeeperListener.java
        ZooKeeperMainServerArg.java
        ZooKeeperNodeTracker.java
        ZooKeeperWatcher.java

/*
 * Copyright 2010 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.replication.regionserver;

import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.replication.ReplicationZookeeper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.zookeeper.KeeperException;

/**
 * Class that handles the source of a replication stream.
 * Currently does not handle more than 1 slave
 * For each slave cluster it selects a random number of peers
 * using a replication ratio. For example, if replication ration = 0.1
 * and slave cluster has 100 region servers, 10 will be selected.
 * <p/>
 * A stream is considered down when we cannot contact a region server on the
 * peer cluster for more than 55 seconds by default.
 * <p/>
 *
 */
public class ReplicationSource extends Thread
    implements ReplicationSourceInterface {

  private static final Log LOG = LogFactory.getLog(ReplicationSource.class);
  // Queue of logs to process
  private PriorityBlockingQueue<Path> queue;
  // container of entries to replicate
  private HLog.Entry[] entriesArray;
  private HConnection conn;
  // Helper class for zookeeper
  private ReplicationZookeeper zkHelper;
  private Configuration conf;
  // ratio of region servers to chose from a slave cluster
  private float ratio;
  private Random random;
  // should we replicate or not?
  private AtomicBoolean replicating;
  // id of the peer cluster this source replicates to
  private String peerClusterId;
  // The manager of all sources to which we ping back our progress
  private ReplicationSourceManager manager;
  // Should we stop everything?
  private Stoppable stopper;
  // List of chosen sinks (region servers)
  private List<HServerAddress> currentPeers;
  // How long should we sleep for each retry
  private long sleepForRetries;
  // Max size in bytes of entriesArray
  private long replicationQueueSizeCapacity;
  // Max number of entries in entriesArray
  private int replicationQueueNbCapacity;
  // Our reader for the current log
  private HLog.Reader reader;
  // Current position in the log
  private long position = 0;
  // Path of the current log
  private volatile Path currentPath;
  private FileSystem fs;
  // id of this cluster
  private byte clusterId;
  // total number of edits we replicated
  private long totalReplicatedEdits = 0;
  // The znode we currently play with
  private String peerClusterZnode;
  // Indicates if this queue is recovered (and will be deleted when depleted)
  private boolean queueRecovered;
  // List of all the dead region servers that had this queue (if recovered)
  private String[] deadRegionServers;
  // Maximum number of retries before taking bold actions
  private long maxRetriesMultiplier;
  // Current number of entries that we need to replicate
  private int currentNbEntries = 0;
  // Current number of operations (Put/Delete) that we need to replicate
  private int currentNbOperations = 0;
  // Indicates if this particular source is running
  private volatile boolean running = true;
  // Metrics for this source
  private ReplicationSourceMetrics metrics;
  // If source is enabled, replication happens. If disabled, nothing will be
  // replicated but HLogs will still be queued
  private AtomicBoolean sourceEnabled = new AtomicBoolean();

  /**
   * Instantiation method used by region servers
   *
   * @param conf configuration to use
   * @param fs file system to use
   * @param manager replication manager to ping to
   * @param stopper     the atomic boolean to use to stop the regionserver
   * @param replicating the atomic boolean that starts/stops replication
   * @param peerClusterZnode the name of our znode
   * @throws IOException
   */
  public void init(final Configuration conf,
                   final FileSystem fs,
                   final ReplicationSourceManager manager,
                   final Stoppable stopper,
                   final AtomicBoolean replicating,
                   final String peerClusterZnode)
      throws IOException {
    this.stopper = stopper;
    this.conf = conf;
    this.replicationQueueSizeCapacity =
        this.conf.getLong("replication.source.size.capacity", 1024*1024*64);
    this.replicationQueueNbCapacity =
        this.conf.getInt("replication.source.nb.capacity", 25000);
    this.entriesArray = new HLog.Entry[this.replicationQueueNbCapacity];
    for (int i = 0; i < this.replicationQueueNbCapacity; i++) {
      this.entriesArray[i] = new HLog.Entry();
    }
    this.maxRetriesMultiplier =
        this.conf.getLong("replication.source.maxretriesmultiplier", 10);
    this.queue =
        new PriorityBlockingQueue<Path>(
            conf.getInt("hbase.regionserver.maxlogs", 32),
            new LogsComparator());
    this.conn = HConnectionManager.getConnection(conf);
    this.zkHelper = manager.getRepZkWrapper();
    this.ratio = this.conf.getFloat("replication.source.ratio", 0.1f);
    this.currentPeers = new ArrayList<HServerAddress>();
    this.random = new Random();
    this.replicating = replicating;
    this.manager = manager;
    this.sleepForRetries =
        this.conf.getLong("replication.source.sleepforretries", 1000);
    this.fs = fs;
    this.clusterId = Byte.valueOf(zkHelper.getClusterId());
    this.metrics = new ReplicationSourceMetrics(peerClusterZnode);

    // Finally look if this is a recovered queue
    this.checkIfQueueRecovered(peerClusterZnode);
  }

  // The passed znode will be either the id of the peer cluster or
  // the handling story of that queue in the form of id-servername-*
  private void checkIfQueueRecovered(String peerClusterZnode) {
    String[] parts = peerClusterZnode.split("-");
    this.queueRecovered = parts.length != 1;
    this.peerClusterId = this.queueRecovered ?
        parts[0] : peerClusterZnode;
    this.peerClusterZnode = peerClusterZnode;
    this.deadRegionServers = new String[parts.length-1];
    // Extract all the places where we could find the hlogs
    for (int i = 1; i < parts.length; i++) {
      this.deadRegionServers[i-1] = parts[i];
    }
  }

  /**
   * Select a number of peers at random using the ratio. Mininum 1.
   */
  private void chooseSinks() throws KeeperException {
    this.currentPeers.clear();
    List<HServerAddress> addresses =
        this.zkHelper.getSlavesAddresses(peerClusterId);
    Set<HServerAddress> setOfAddr = new HashSet<HServerAddress>();
    int nbPeers = (int) (Math.ceil(addresses.size() * ratio));
    LOG.info("Getting " + nbPeers +
        " rs from peer cluster # " + peerClusterId);
    for (int i = 0; i < nbPeers; i++) {
      HServerAddress address;
      // Make sure we get one address that we don't already have
      do {
        address = addresses.get(this.random.nextInt(addresses.size()));
      } while (setOfAddr.contains(address));
      LOG.info("Choosing peer " + address);
      setOfAddr.add(address);
    }
    this.currentPeers.addAll(setOfAddr);
  }

  @Override
  public void enqueueLog(Path log) {
    this.queue.put(log);
    this.metrics.sizeOfLogQueue.set(queue.size());
  }

  @Override
  public void run() {
    connectToPeers();
    // We were stopped while looping to connect to sinks, just abort
    if (this.stopper.isStopped()) {
      return;
    }
    // If this is recovered, the queue is already full and the first log
    // normally has a position (unless the RS failed between 2 logs)
    if (this.queueRecovered) {
      try {
        this.position = this.zkHelper.getHLogRepPosition(
            this.peerClusterZnode, this.queue.peek().getName());
      } catch (KeeperException e) {
        this.terminate("Couldn't get the position of this recovered queue " +
            peerClusterZnode, e);
      }
    }
    int sleepMultiplier = 1;
    // Loop until we close down
    while (!stopper.isStopped() && this.running) {
      // Sleep until replication is enabled again
      if (!this.replicating.get() || !this.sourceEnabled.get()) {
        if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
          sleepMultiplier++;
        }
        continue;
      }
      // Get a new path
      if (!getNextPath()) {
        if (sleepForRetries("No log to process", sleepMultiplier)) {
          sleepMultiplier++;
        }
        continue;
      }
      // Open a reader on it
      if (!openReader(sleepMultiplier)) {
        // Reset the sleep multiplier, else it'd be reused for the next file
        sleepMultiplier = 1;
        continue;
      }

      // If we got a null reader but didn't continue, then sleep and continue
      if (this.reader == null) {
        if (sleepForRetries("Unable to open a reader", sleepMultiplier)) {
          sleepMultiplier++;
        }
        continue;
      }

      boolean gotIOE = false;
      currentNbEntries = 0;
      try {
        if(readAllEntriesToReplicateOrNextFile()) {
          continue;
        }
      } catch (IOException ioe) {
        LOG.warn(peerClusterZnode + " Got: ", ioe);
        gotIOE = true;
        if (ioe.getCause() instanceof EOFException) {

          boolean considerDumping = false;
          if (this.queueRecovered) {
            try {
              FileStatus stat = this.fs.getFileStatus(this.currentPath);
              if (stat.getLen() == 0) {
                LOG.warn(peerClusterZnode + " Got EOF and the file was empty");
              }
              considerDumping = true;
            } catch (IOException e) {
              LOG.warn(peerClusterZnode + " Got while getting file size: ", e);
            }
          } else if (currentNbEntries != 0) {
            LOG.warn(peerClusterZnode + " Got EOF while reading, " +
                "looks like this file is broken? " + currentPath);
            considerDumping = true;
            currentNbEntries = 0;
          }

          if (considerDumping &&
              sleepMultiplier == this.maxRetriesMultiplier &&
              processEndOfFile()) {
            continue;
          }
        }
      } finally {
        try {
          // if current path is null, it means we processEndOfFile hence
          if (this.currentPath != null && !gotIOE) {
            this.position = this.reader.getPosition();
          }
          if (this.reader != null) {
            this.reader.close();
          }
        } catch (IOException e) {
          gotIOE = true;
          LOG.warn("Unable to finalize the tailing of a file", e);
        }
      }

      // If we didn't get anything to replicate, or if we hit a IOE,
      // wait a bit and retry.
      // But if we need to stop, don't bother sleeping
      if (!stopper.isStopped() && (gotIOE || currentNbEntries == 0)) {
        this.manager.logPositionAndCleanOldLogs(this.currentPath,
            this.peerClusterZnode, this.position, queueRecovered);
        if (sleepForRetries("Nothing to replicate", sleepMultiplier)) {
          sleepMultiplier++;
        }
        continue;
      }
      sleepMultiplier = 1;
      shipEdits();

    }
    LOG.debug("Source exiting " + peerClusterId);
  }

  /**
   * Read all the entries from the current log files and retain those
   * that need to be replicated. Else, process the end of the current file.
   * @return true if we got nothing and went to the next file, false if we got
   * entries
   * @throws IOException
   */
  protected boolean readAllEntriesToReplicateOrNextFile() throws IOException{
    long seenEntries = 0;
    if (this.position != 0) {
      this.reader.seek(this.position);
    }
    HLog.Entry entry = this.reader.next(this.entriesArray[currentNbEntries]);
    while (entry != null) {
      WALEdit edit = entry.getEdit();
      this.metrics.logEditsReadRate.inc(1);
      seenEntries++;
      // Remove all KVs that should not be replicated
      removeNonReplicableEdits(edit);
      HLogKey logKey = entry.getKey();
      // Don't replicate catalog entries, if the WALEdit wasn't
      // containing anything to replicate and if we're currently not set to replicate
      if (!(Bytes.equals(logKey.getTablename(), HConstants.ROOT_TABLE_NAME) ||
          Bytes.equals(logKey.getTablename(), HConstants.META_TABLE_NAME)) &&
          edit.size() != 0 && replicating.get()) {
        logKey.setClusterId(this.clusterId);
        currentNbOperations += countDistinctRowKeys(edit);
        currentNbEntries++;
      } else {
        this.metrics.logEditsFilteredRate.inc(1);
      }
      // Stop if too many entries or too big
      if ((this.reader.getPosition() - this.position)
          >= this.replicationQueueSizeCapacity ||
          currentNbEntries >= this.replicationQueueNbCapacity) {
        break;
      }
      entry = this.reader.next(entriesArray[currentNbEntries]);
    }
    LOG.debug("currentNbOperations:" + currentNbOperations +
        " and seenEntries:" + seenEntries +
        " and size: " + (this.reader.getPosition() - this.position));
    // If we didn't get anything and the queue has an object, it means we
    // hit the end of the file for sure
    return seenEntries == 0 && processEndOfFile();
  }

  private void connectToPeers() {
    // Connect to peer cluster first, unless we have to stop
    while (!this.stopper.isStopped() && this.currentPeers.size() == 0) {
      try {
        chooseSinks();
        Thread.sleep(this.sleepForRetries);
      } catch (InterruptedException e) {
        LOG.error("Interrupted while trying to connect to sinks", e);
      } catch (KeeperException e) {
        LOG.error("Error talking to zookeeper, retrying", e);
      }
    }
  }

  /**
   * Poll for the next path
   * @return true if a path was obtained, false if not
   */
  protected boolean getNextPath() {
    try {
      if (this.currentPath == null) {
        this.currentPath = queue.poll(this.sleepForRetries, TimeUnit.MILLISECONDS);
        this.metrics.sizeOfLogQueue.set(queue.size());
      }
    } catch (InterruptedException e) {
      LOG.warn("Interrupted while reading edits", e);
    }
    return this.currentPath != null;
  }

  /**
   * Open a reader on the current path
   *
   * @param sleepMultiplier by how many times the default sleeping time is augmented
   * @return true if we should continue with that file, false if we are over with it
   */
  protected boolean openReader(int sleepMultiplier) {
    try {
      LOG.debug("Opening log for replication " + this.currentPath.getName() +
          " at " + this.position);
      try {
       this.reader = null;
       this.reader = HLog.getReader(this.fs, this.currentPath, this.conf);
      } catch (FileNotFoundException fnfe) {
        if (this.queueRecovered) {
          // We didn't find the log in the archive directory, look if it still
          // exists in the dead RS folder (there could be a chain of failures
          // to look at)
          LOG.info("NB dead servers : " + deadRegionServers.length);
          for (int i = this.deadRegionServers.length - 1; i >= 0; i--) {

            Path deadRsDirectory =
                new Path(manager.getLogDir().getParent(), this.deadRegionServers[i]);
            Path possibleLogLocation =
                new Path(deadRsDirectory, currentPath.getName());
            LOG.info("Possible location " + possibleLogLocation.toUri().toString());
            if (this.manager.getFs().exists(possibleLogLocation)) {
              // We found the right new location
              LOG.info("Log " + this.currentPath + " still exists at " +
                  possibleLogLocation);
              // Breaking here will make us sleep since reader is null
              return true;
            }
          }
          // TODO What happens if the log was missing from every single location?
          // Although we need to check a couple of times as the log could have
          // been moved by the master between the checks
          // It can also happen if a recovered queue wasn't properly cleaned,
          // such that the znode pointing to a log exists but the log was
          // deleted a long time ago.
          // For the moment, we'll throw the IO and processEndOfFile
          throw new IOException("File from recovered queue is " +
              "nowhere to be found", fnfe);
        } else {
          // If the log was archived, continue reading from there
          Path archivedLogLocation =
              new Path(manager.getOldLogDir(), currentPath.getName());
          if (this.manager.getFs().exists(archivedLogLocation)) {
            currentPath = archivedLogLocation;
            LOG.info("Log " + this.currentPath + " was moved to " +
                archivedLogLocation);
            // Open the log at the new location
            this.openReader(sleepMultiplier);

          }
          // TODO What happens the log is missing in both places?
        }
      }
    } catch (IOException ioe) {
      LOG.warn(peerClusterZnode + " Got: ", ioe);
      // TODO Need a better way to determinate if a file is really gone but
      // TODO without scanning all logs dir
      if (sleepMultiplier == this.maxRetriesMultiplier) {
        LOG.warn("Waited too long for this file, considering dumping");
        return !processEndOfFile();
      }
    }
    return true;
  }

  /**
   * Do the sleeping logic
   * @param msg Why we sleep
   * @param sleepMultiplier by how many times the default sleeping time is augmented
   * @return True if <code>sleepMultiplier</code> is < <code>maxRetriesMultiplier</code>
   */
  protected boolean sleepForRetries(String msg, int sleepMultiplier) {
    try {
      LOG.debug(msg + ", sleeping " + sleepForRetries + " times " + sleepMultiplier);
      Thread.sleep(this.sleepForRetries * sleepMultiplier);
    } catch (InterruptedException e) {
      LOG.debug("Interrupted while sleeping between retries");
    }
    return sleepMultiplier < maxRetriesMultiplier;
  }

  /**
   * We only want KVs that are scoped other than local
   * @param edit The KV to check for replication
   */
  protected void removeNonReplicableEdits(WALEdit edit) {
    NavigableMap<byte[], Integer> scopes = edit.getScopes();
    List<KeyValue> kvs = edit.getKeyValues();
    for (int i = 0; i < edit.size(); i++) {
      KeyValue kv = kvs.get(i);
      // The scope will be null or empty if
      // there's nothing to replicate in that WALEdit
      if (scopes == null || !scopes.containsKey(kv.getFamily())) {
        kvs.remove(i);
        i--;
      }
    }
  }

  /**
   * Count the number of different row keys in the given edit because of
   * mini-batching. We assume that there's at least one KV in the WALEdit.
   * @param edit edit to count row keys from
   * @return number of different row keys
   */
  private int countDistinctRowKeys(WALEdit edit) {
    List<KeyValue> kvs = edit.getKeyValues();
    int distinctRowKeys = 1;
    KeyValue lastKV = kvs.get(0);
    for (int i = 0; i < edit.size(); i++) {
      if (!kvs.get(i).matchingRow(lastKV)) {
        distinctRowKeys++;
      }
    }
    return distinctRowKeys;
  }

  /**
   * Do the shipping logic
   */
  protected void shipEdits() {
    int sleepMultiplier = 1;
    if (this.currentNbEntries == 0) {
      LOG.warn("Was given 0 edits to ship");
      return;
    }
    while (!this.stopper.isStopped()) {
      try {
        HRegionInterface rrs = getRS();
        LOG.debug("Replicating " + currentNbEntries);
        rrs.replicateLogEntries(Arrays.copyOf(this.entriesArray, currentNbEntries));
        this.manager.logPositionAndCleanOldLogs(this.currentPath,
            this.peerClusterZnode, this.position, queueRecovered);
        this.totalReplicatedEdits += currentNbEntries;
        this.metrics.shippedBatchesRate.inc(1);
        this.metrics.shippedOpsRate.inc(
            this.currentNbOperations);
        this.metrics.setAgeOfLastShippedOp(
            this.entriesArray[this.entriesArray.length-1].getKey().getWriteTime());
        LOG.debug("Replicated in total: " + this.totalReplicatedEdits);
        break;

      } catch (IOException ioe) {
        LOG.warn("Unable to replicate because ", ioe);
        try {
          boolean down;
          do {
            down = isSlaveDown();
            if (down) {
              LOG.debug("The region server we tried to ping didn't answer, " +
                  "sleeping " + sleepForRetries + " times " + sleepMultiplier);
              Thread.sleep(this.sleepForRetries * sleepMultiplier);
              if (sleepMultiplier < maxRetriesMultiplier) {
                sleepMultiplier++;
              } else {
                chooseSinks();
              }
            }
          } while (!this.stopper.isStopped() && down);
        } catch (InterruptedException e) {
          LOG.debug("Interrupted while trying to contact the peer cluster");
        } catch (KeeperException e) {
          LOG.error("Error talking to zookeeper, retrying", e);
        }

      }
    }
  }

  /**
   * If the queue isn't empty, switch to the next one
   * Else if this is a recovered queue, it means we're done!
   * Else we'll just continue to try reading the log file
   * @return true if we're done with the current file, false if we should
   * continue trying to read from it
   */
  protected boolean processEndOfFile() {
    if (this.queue.size() != 0) {
      this.currentPath = null;
      this.position = 0;
      return true;
    } else if (this.queueRecovered) {
      this.manager.closeRecoveredQueue(this);
      LOG.info("Finished recovering the queue");
      this.running = false;
      return true;
    }
    return false;
  }

  public void startup() {
    String n = Thread.currentThread().getName();
    Thread.UncaughtExceptionHandler handler =
        new Thread.UncaughtExceptionHandler() {
          public void uncaughtException(final Thread t, final Throwable e) {
            terminate("Uncaught exception during runtime", new Exception(e));
          }
        };
    Threads.setDaemonThreadRunning(
        this, n + ".replicationSource," + peerClusterZnode, handler);
  }

  public void terminate(String reason) {
    terminate(reason, null);
  }

  public void terminate(String reason, Exception cause) {
    if (cause == null) {
      LOG.info("Closing source "
          + this.peerClusterZnode + " because: " + reason);

    } else {
      LOG.error("Closing source " + this.peerClusterZnode
          + " because an error occurred: " + reason, cause);
    }
    this.running = false;
    Threads.shutdown(this, this.sleepForRetries);
  }

  /**
   * Get a new region server at random from this peer
   * @return
   * @throws IOException
   */
  private HRegionInterface getRS() throws IOException {
    if (this.currentPeers.size() == 0) {
      throw new IOException(this.peerClusterZnode + " has 0 region servers");
    }
    HServerAddress address =
        currentPeers.get(random.nextInt(this.currentPeers.size()));
    return this.conn.getHRegionConnection(address);
  }

  /**
   * Check if the slave is down by trying to establish a connection
   * @return true if down, false if up
   * @throws InterruptedException
   */
  public boolean isSlaveDown() throws InterruptedException {
    final CountDownLatch latch = new CountDownLatch(1);
    Thread pingThread = new Thread() {
      public void run() {
        try {
          HRegionInterface rrs = getRS();
          // Dummy call which should fail
          rrs.getHServerInfo();
          latch.countDown();
        } catch (IOException ex) {
          LOG.info("Slave cluster looks down: " + ex.getMessage());
        }
      }
    };
    pingThread.start();
    // awaits returns true if countDown happened
    boolean down = ! latch.await(this.sleepForRetries, TimeUnit.MILLISECONDS);
    pingThread.interrupt();
    return down;
  }

  public String getPeerClusterZnode() {
    return this.peerClusterZnode;
  }

  public String getPeerClusterId() {
    return this.peerClusterId;
  }

  public Path getCurrentPath() {
    return this.currentPath;
  }

  public void setSourceEnabled(boolean status) {
    this.sourceEnabled.set(status);
  }

  /**
   * Comparator used to compare logs together based on their start time
   */
  public static class LogsComparator implements Comparator<Path> {

    @Override
    public int compare(Path o1, Path o2) {
      return Long.valueOf(getTS(o1)).compareTo(getTS(o2));
    }

    @Override
    public boolean equals(Object o) {
      return true;
    }

    /**
     * Split a path to get the start time
     * For example: 10.20.20.171%3A60020.1277499063250
     * @param p path to split
     * @return start time
     */
    private long getTS(Path p) {
      String[] parts = p.getName().split("\\.");
      return Long.parseLong(parts[parts.length-1]);
    }
  }
}