TestJournal.java example

Explorer
HDP-2.2-Patched-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.qjournal.server;

import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.qjournal.QJMTestUtil;
import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
import org.apache.hadoop.hdfs.qjournal.protocol.JournalOutOfSyncException;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProtoOrBuilder;
import org.apache.hadoop.hdfs.qjournal.server.Journal;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.*;
import org.mockito.Mockito;

public class TestJournal {
  private static final NamespaceInfo FAKE_NSINFO = new NamespaceInfo(
      12345, "mycluster", "my-bp", 0L);
  private static final NamespaceInfo FAKE_NSINFO_2 = new NamespaceInfo(
      6789, "mycluster", "my-bp", 0L);
  
  private static final String JID = "test-journal";

  private static final File TEST_LOG_DIR = new File(
      new File(MiniDFSCluster.getBaseDirectory()), "TestJournal");

  private StorageErrorReporter mockErrorReporter = Mockito.mock(
      StorageErrorReporter.class);

  private Configuration conf;
  private Journal journal;

  
  @Before
  public void setup() throws Exception {
    FileUtil.fullyDelete(TEST_LOG_DIR);
    conf = new Configuration();
    journal = new Journal(conf, TEST_LOG_DIR, JID,
      mockErrorReporter);
    journal.format(FAKE_NSINFO);
  }
  
  @After
  public void verifyNoStorageErrors() throws Exception{
    Mockito.verify(mockErrorReporter, Mockito.never())
      .reportErrorOnFile(Mockito.<File>any());
  }
  
  @After
  public void cleanup() {
    IOUtils.closeStream(journal);
  }
  
  @Test (timeout = 10000)
  public void testEpochHandling() throws Exception {
    assertEquals(0, journal.getLastPromisedEpoch());
    NewEpochResponseProto newEpoch =
        journal.newEpoch(FAKE_NSINFO, 1);
    assertFalse(newEpoch.hasLastSegmentTxId());
    assertEquals(1, journal.getLastPromisedEpoch());
    journal.newEpoch(FAKE_NSINFO, 3);
    assertFalse(newEpoch.hasLastSegmentTxId());
    assertEquals(3, journal.getLastPromisedEpoch());
    try {
      journal.newEpoch(FAKE_NSINFO, 3);
      fail("Should have failed to promise same epoch twice");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "Proposed epoch 3 <= last promise 3", ioe);
    }
    try {
      journal.startLogSegment(makeRI(1), 12345L);
      fail("Should have rejected call from prior epoch");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "epoch 1 is less than the last promised epoch 3", ioe);
    }
    try {
      journal.journal(makeRI(1), 12345L, 100L, 0, new byte[0]);
      fail("Should have rejected call from prior epoch");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "epoch 1 is less than the last promised epoch 3", ioe);
    }
  }
  
  @Test (timeout = 10000)
  public void testMaintainCommittedTxId() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 1);
    journal.startLogSegment(makeRI(1), 1);
    // Send txids 1-3, with a request indicating only 0 committed
    journal.journal(new RequestInfo(JID, 1, 2, 0), 1, 1, 3,
        QJMTestUtil.createTxnData(1, 3));
    assertEquals(0, journal.getCommittedTxnIdForTests());
    
    // Send 4-6, with request indicating that through 3 is committed.
    journal.journal(new RequestInfo(JID, 1, 3, 3), 1, 4, 3,
        QJMTestUtil.createTxnData(4, 6));
    assertEquals(3, journal.getCommittedTxnIdForTests());    
  }
  
  @Test (timeout = 10000)
  public void testRestartJournal() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 1);
    journal.startLogSegment(makeRI(1), 1);
    journal.journal(makeRI(2), 1, 1, 2, 
        QJMTestUtil.createTxnData(1, 2));
    // Don't finalize.
    
    String storageString = journal.getStorage().toColonSeparatedString();
    System.err.println("storage string: " + storageString);
    journal.close(); // close to unlock the storage dir
    
    // Now re-instantiate, make sure history is still there
    journal = new Journal(conf, TEST_LOG_DIR, JID, mockErrorReporter);
    
    // The storage info should be read, even if no writer has taken over.
    assertEquals(storageString,
        journal.getStorage().toColonSeparatedString());

    assertEquals(1, journal.getLastPromisedEpoch());
    NewEpochResponseProtoOrBuilder newEpoch = journal.newEpoch(FAKE_NSINFO, 2);
    assertEquals(1, newEpoch.getLastSegmentTxId());
  }
  
  @Test (timeout = 10000)
  public void testFormatResetsCachedValues() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 12345L);
    journal.startLogSegment(new RequestInfo(JID, 12345L, 1L, 0L), 1L);

    assertEquals(12345L, journal.getLastPromisedEpoch());
    assertEquals(12345L, journal.getLastWriterEpoch());
    assertTrue(journal.isFormatted());
    
    // Close the journal in preparation for reformatting it.
    journal.close();
    journal.format(FAKE_NSINFO_2);
    
    assertEquals(0, journal.getLastPromisedEpoch());
    assertEquals(0, journal.getLastWriterEpoch());
    assertTrue(journal.isFormatted());
  }
  
  /**
   * Test that, if the writer crashes at the very beginning of a segment,
   * before any transactions are written, that the next newEpoch() call
   * returns the prior segment txid as its most recent segment.
   */
  @Test (timeout = 10000)
  public void testNewEpochAtBeginningOfSegment() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 1);
    journal.startLogSegment(makeRI(1), 1);
    journal.journal(makeRI(2), 1, 1, 2, 
        QJMTestUtil.createTxnData(1, 2));
    journal.finalizeLogSegment(makeRI(3), 1, 2);
    journal.startLogSegment(makeRI(4), 3);
    NewEpochResponseProto resp = journal.newEpoch(FAKE_NSINFO, 2);
    assertEquals(1, resp.getLastSegmentTxId());
  }
  
  @Test (timeout = 10000)
  public void testJournalLocking() throws Exception {
    Assume.assumeTrue(journal.getStorage().getStorageDir(0).isLockSupported());
    StorageDirectory sd = journal.getStorage().getStorageDir(0);
    File lockFile = new File(sd.getRoot(), Storage.STORAGE_FILE_LOCK);
    
    // Journal should be locked, since the format() call locks it.
    GenericTestUtils.assertExists(lockFile);

    journal.newEpoch(FAKE_NSINFO,  1);
    try {
      new Journal(conf, TEST_LOG_DIR, JID, mockErrorReporter);
      fail("Did not fail to create another journal in same dir");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "Cannot lock storage", ioe);
    }
    
    journal.close();
    
    // Journal should no longer be locked after the close() call.
    // Hence, should be able to create a new Journal in the same dir.
    Journal journal2 = new Journal(conf, TEST_LOG_DIR, JID, mockErrorReporter);
    journal2.newEpoch(FAKE_NSINFO, 2);
    journal2.close();
  }
  
  /**
   * Test finalizing a segment after some batch of edits were missed.
   * This should fail, since we validate the log before finalization.
   */
  @Test (timeout = 10000)
  public void testFinalizeWhenEditsAreMissed() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 1);
    journal.startLogSegment(makeRI(1), 1);
    journal.journal(makeRI(2), 1, 1, 3,
        QJMTestUtil.createTxnData(1, 3));
    
    // Try to finalize up to txn 6, even though we only wrote up to txn 3.
    try {
      journal.finalizeLogSegment(makeRI(3), 1, 6);
      fail("did not fail to finalize");
    } catch (JournalOutOfSyncException e) {
      GenericTestUtils.assertExceptionContains(
          "but only written up to txid 3", e);
    }
    
    // Check that, even if we re-construct the journal by scanning the
    // disk, we don't allow finalizing incorrectly.
    journal.close();
    journal = new Journal(conf, TEST_LOG_DIR, JID, mockErrorReporter);
    
    try {
      journal.finalizeLogSegment(makeRI(4), 1, 6);
      fail("did not fail to finalize");
    } catch (JournalOutOfSyncException e) {
      GenericTestUtils.assertExceptionContains(
          "disk only contains up to txid 3", e);
    }
  }
  
  /**
   * Ensure that finalizing a segment which doesn't exist throws the
   * appropriate exception.
   */
  @Test (timeout = 10000)
  public void testFinalizeMissingSegment() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 1);
    try {
      journal.finalizeLogSegment(makeRI(1), 1000, 1001);
      fail("did not fail to finalize");
    } catch (JournalOutOfSyncException e) {
      GenericTestUtils.assertExceptionContains(
          "No log file to finalize at transaction ID 1000", e);
    }
  }

  /**
   * Assume that a client is writing to a journal, but loses its connection
   * in the middle of a segment. Thus, any future journal() calls in that
   * segment may fail, because some txns were missed while the connection was
   * down.
   *
   * Eventually, the connection comes back, and the NN tries to start a new
   * segment at a higher txid. This should abort the old one and succeed.
   */
  @Test (timeout = 10000)
  public void testAbortOldSegmentIfFinalizeIsMissed() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 1);
    
    // Start a segment at txid 1, and write a batch of 3 txns.
    journal.startLogSegment(makeRI(1), 1);
    journal.journal(makeRI(2), 1, 1, 3,
        QJMTestUtil.createTxnData(1, 3));

    GenericTestUtils.assertExists(
        journal.getStorage().getInProgressEditLog(1));
    
    // Try to start new segment at txid 6, this should abort old segment and
    // then succeed, allowing us to write txid 6-9.
    journal.startLogSegment(makeRI(3), 6);
    journal.journal(makeRI(4), 6, 6, 3,
        QJMTestUtil.createTxnData(6, 3));

    // The old segment should *not* be finalized.
    GenericTestUtils.assertExists(
        journal.getStorage().getInProgressEditLog(1));
    GenericTestUtils.assertExists(
        journal.getStorage().getInProgressEditLog(6));
  }
  
  /**
   * Test behavior of startLogSegment() when a segment with the
   * same transaction ID already exists.
   */
  @Test (timeout = 10000)
  public void testStartLogSegmentWhenAlreadyExists() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 1);
    
    // Start a segment at txid 1, and write just 1 transaction. This
    // would normally be the START_LOG_SEGMENT transaction.
    journal.startLogSegment(makeRI(1), 1);
    journal.journal(makeRI(2), 1, 1, 1,
        QJMTestUtil.createTxnData(1, 1));
    
    // Try to start new segment at txid 1, this should succeed, because
    // we are allowed to re-start a segment if we only ever had the
    // START_LOG_SEGMENT transaction logged.
    journal.startLogSegment(makeRI(3), 1);
    journal.journal(makeRI(4), 1, 1, 1,
        QJMTestUtil.createTxnData(1, 1));

    // This time through, write more transactions afterwards, simulating
    // real user transactions.
    journal.journal(makeRI(5), 1, 2, 3,
        QJMTestUtil.createTxnData(2, 3));

    try {
      journal.startLogSegment(makeRI(6), 1);
      fail("Did not fail to start log segment which would overwrite " +
          "an existing one");
    } catch (IllegalStateException ise) {
      GenericTestUtils.assertExceptionContains(
          "seems to contain valid transactions", ise);
    }
    
    journal.finalizeLogSegment(makeRI(7), 1, 4);
    
    // Ensure that we cannot overwrite a finalized segment
    try {
      journal.startLogSegment(makeRI(8), 1);
      fail("Did not fail to start log segment which would overwrite " +
          "an existing one");
    } catch (IllegalStateException ise) {
      GenericTestUtils.assertExceptionContains(
          "have a finalized segment", ise);
    }

  }
  
  private static RequestInfo makeRI(int serial) {
    return new RequestInfo(JID, 1, serial, 0);
  }
  
  @Test (timeout = 10000)
  public void testNamespaceVerification() throws Exception {
    journal.newEpoch(FAKE_NSINFO, 1);

    try {
      journal.newEpoch(FAKE_NSINFO_2, 2);
      fail("Did not fail newEpoch() when namespaces mismatched");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "Incompatible namespaceID", ioe);
    }
  }

}