/** * This file is part of Graylog. * * Graylog is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Graylog is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Graylog. If not, see <http://www.gnu.org/licenses/>. */ package org.graylog2.shared.journal; import com.codahale.metrics.MetricRegistry; import com.github.joschi.jadconfig.util.Size; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.eventbus.EventBus; import com.google.common.io.Files; import com.google.common.primitives.Ints; import kafka.common.KafkaException; import kafka.log.LogSegment; import kafka.message.Message; import kafka.message.MessageSet; import kafka.utils.FileLock; import org.graylog2.Configuration; import org.graylog2.audit.NullAuditEventSender; import org.graylog2.plugin.InstantMillisProvider; import org.graylog2.plugin.ServerStatus; import org.graylog2.plugin.lifecycles.Lifecycle; import org.joda.time.DateTime; import org.joda.time.DateTimeUtils; import org.joda.time.DateTimeZone; import org.joda.time.Duration; import org.joda.time.Period; import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.mockito.junit.MockitoJUnit; import org.mockito.junit.MockitoRule; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.EnumSet; import java.util.List; import java.util.UUID; import java.util.concurrent.ScheduledThreadPoolExecutor; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.commons.io.filefilter.FileFilterUtils.and; import static org.apache.commons.io.filefilter.FileFilterUtils.directoryFileFilter; import static org.apache.commons.io.filefilter.FileFilterUtils.fileFileFilter; import static org.apache.commons.io.filefilter.FileFilterUtils.nameFileFilter; import static org.apache.commons.io.filefilter.FileFilterUtils.suffixFileFilter; import static org.apache.commons.lang.RandomStringUtils.randomAlphanumeric; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.junit.Assume.assumeTrue; public class KafkaJournalTest { @Rule public final MockitoRule mockitoRule = MockitoJUnit.rule(); @Rule public final TemporaryFolder temporaryFolder = new TemporaryFolder(); private ServerStatus serverStatus; private ScheduledThreadPoolExecutor scheduler; private File journalDirectory; @Before public void setUp() throws IOException { scheduler = new ScheduledThreadPoolExecutor(1); scheduler.prestartCoreThread(); journalDirectory = temporaryFolder.newFolder(); final File nodeId = temporaryFolder.newFile("node-id"); Files.write(UUID.randomUUID().toString(), nodeId, StandardCharsets.UTF_8); final Configuration configuration = new Configuration() { @Override public String getNodeIdFile() { return nodeId.getAbsolutePath(); } }; serverStatus = new ServerStatus(configuration, EnumSet.of(ServerStatus.Capability.MASTER), new EventBus("KafkaJournalTest"), NullAuditEventSender::new); } @After public void tearDown() { scheduler.shutdown(); } @Test public void writeAndRead() throws IOException { final Journal journal = new KafkaJournal(journalDirectory, scheduler, Size.megabytes(100L), Duration.standardHours(1), Size.megabytes(5L), Duration.standardHours(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); final byte[] idBytes = "id".getBytes(UTF_8); final byte[] messageBytes = "message".getBytes(UTF_8); final long position = journal.write(idBytes, messageBytes); final List<Journal.JournalReadEntry> messages = journal.read(1); final Journal.JournalReadEntry firstMessage = Iterators.getOnlyElement(messages.iterator()); assertEquals("message", new String(firstMessage.getPayload(), UTF_8)); } @Test public void readAtLeastOne() throws Exception { final Journal journal = new KafkaJournal(journalDirectory, scheduler, Size.megabytes(100L), Duration.standardHours(1), Size.megabytes(5L), Duration.standardHours(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); final byte[] idBytes = "id".getBytes(UTF_8); final byte[] messageBytes = "message1".getBytes(UTF_8); final long position = journal.write(idBytes, messageBytes); // Trying to read 0 should always read at least 1 entry. final List<Journal.JournalReadEntry> messages = journal.read(0); final Journal.JournalReadEntry firstMessage = Iterators.getOnlyElement(messages.iterator()); assertEquals("message1", new String(firstMessage.getPayload(), UTF_8)); } private int createBulkChunks(KafkaJournal journal, Size segmentSize, int bulkCount) { // Magic numbers deduced by magic… int bulkSize = Ints.saturatedCast(segmentSize.toBytes() / (2L * 16L)); // perform multiple writes to make multiple segments for (int currentBulk = 0; currentBulk < bulkCount; currentBulk++) { final List<Journal.Entry> entries = Lists.newArrayListWithExpectedSize(bulkSize); long writtenBytes = 0L; for (int i = 0; i < bulkSize; i++) { final byte[] idBytes = ("id" + i).getBytes(UTF_8); final byte[] messageBytes = ("message " + i).getBytes(UTF_8); writtenBytes += 3 * (idBytes.length + messageBytes.length); if (writtenBytes > segmentSize.toBytes()) { break; } entries.add(journal.createEntry(idBytes, messageBytes)); } journal.write(entries); } return bulkSize; } private int countSegmentsInDir(File messageJournalFile) { // let it throw return messageJournalFile.list(and(fileFileFilter(), suffixFileFilter(".log"))).length; } @Test public void maxSegmentSize() throws Exception { final Size segmentSize = Size.kilobytes(1L); final KafkaJournal journal = new KafkaJournal(journalDirectory, scheduler, segmentSize, Duration.standardHours(1), Size.kilobytes(10L), Duration.standardDays(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); long size = 0L; long maxSize = segmentSize.toBytes(); final List<Journal.Entry> list = Lists.newArrayList(); while (size <= maxSize) { final byte[] idBytes = ("the1-id").getBytes(UTF_8); final byte[] messageBytes = ("the1-message").getBytes(UTF_8); size += idBytes.length + messageBytes.length; list.add(journal.createEntry(idBytes, messageBytes)); } // Make sure all messages have been written assertThat(journal.write(list)).isEqualTo(list.size() - 1); } @Test public void maxMessageSize() throws Exception { final Size segmentSize = Size.kilobytes(1L); final KafkaJournal journal = new KafkaJournal(journalDirectory, scheduler, segmentSize, Duration.standardHours(1), Size.kilobytes(10L), Duration.standardDays(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); long size = 0L; long maxSize = segmentSize.toBytes(); final List<Journal.Entry> list = Lists.newArrayList(); final String largeMessage1 = randomAlphanumeric(Ints.saturatedCast(segmentSize.toBytes() * 2)); list.add(journal.createEntry(randomAlphanumeric(6).getBytes(UTF_8), largeMessage1.getBytes(UTF_8))); final byte[] idBytes0 = randomAlphanumeric(6).getBytes(UTF_8); // Build a message that has exactly the max segment size final String largeMessage2 = randomAlphanumeric(Ints.saturatedCast(segmentSize.toBytes() - MessageSet.LogOverhead() - Message.MessageOverhead() - idBytes0.length)); list.add(journal.createEntry(idBytes0, largeMessage2.getBytes(UTF_8))); while (size <= maxSize) { final byte[] idBytes = randomAlphanumeric(6).getBytes(UTF_8); final byte[] messageBytes = "the-message".getBytes(UTF_8); size += idBytes.length + messageBytes.length; list.add(journal.createEntry(idBytes, messageBytes)); } // Make sure all messages but the large one have been written assertThat(journal.write(list)).isEqualTo(list.size() - 2); } @Test public void segmentRotation() throws Exception { final Size segmentSize = Size.kilobytes(1L); final KafkaJournal journal = new KafkaJournal(journalDirectory, scheduler, segmentSize, Duration.standardHours(1), Size.kilobytes(10L), Duration.standardDays(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); createBulkChunks(journal, segmentSize, 3); final File[] files = journalDirectory.listFiles(); assertNotNull(files); assertTrue("there should be files in the journal directory", files.length > 0); final File[] messageJournalDir = journalDirectory.listFiles((FileFilter) and(directoryFileFilter(), nameFileFilter("messagejournal-0"))); assertTrue(messageJournalDir.length == 1); final File[] logFiles = messageJournalDir[0].listFiles((FileFilter) and(fileFileFilter(), suffixFileFilter(".log"))); assertEquals("should have two journal segments", 3, logFiles.length); } @Test public void segmentSizeCleanup() throws Exception { final Size segmentSize = Size.kilobytes(1L); final KafkaJournal journal = new KafkaJournal(journalDirectory, scheduler, segmentSize, Duration.standardHours(1), Size.kilobytes(1L), Duration.standardDays(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); final File messageJournalDir = new File(journalDirectory, "messagejournal-0"); assertTrue(messageJournalDir.exists()); // create enough chunks so that we exceed the maximum journal size we configured createBulkChunks(journal, segmentSize, 3); // make sure everything is on disk journal.flushDirtyLogs(); assertEquals(3, countSegmentsInDir(messageJournalDir)); final int cleanedLogs = journal.cleanupLogs(); assertEquals(1, cleanedLogs); final int numberOfSegments = countSegmentsInDir(messageJournalDir); assertEquals(2, numberOfSegments); } @Test public void segmentAgeCleanup() throws Exception { final InstantMillisProvider clock = new InstantMillisProvider(DateTime.now(DateTimeZone.UTC)); DateTimeUtils.setCurrentMillisProvider(clock); try { final Size segmentSize = Size.kilobytes(1L); final KafkaJournal journal = new KafkaJournal(journalDirectory, scheduler, segmentSize, Duration.standardHours(1), Size.kilobytes(10L), Duration.standardMinutes(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); final File messageJournalDir = new File(journalDirectory, "messagejournal-0"); assertTrue(messageJournalDir.exists()); // we need to fix up the last modified times of the actual files. long lastModifiedTs[] = new long[2]; // create two chunks, 30 seconds apart createBulkChunks(journal, segmentSize, 1); journal.flushDirtyLogs(); lastModifiedTs[0] = clock.getMillis(); clock.tick(Period.seconds(30)); createBulkChunks(journal, segmentSize, 1); journal.flushDirtyLogs(); lastModifiedTs[1] = clock.getMillis(); int i = 0; for (final LogSegment segment : journal.getSegments()) { assertTrue(i < 2); segment.lastModified_$eq(lastModifiedTs[i]); i++; } int cleanedLogs = journal.cleanupLogs(); assertEquals("no segments should've been cleaned", cleanedLogs, 0); assertEquals("two segments segment should remain", countSegmentsInDir(messageJournalDir), 2); // move clock beyond the retention period and clean again clock.tick(Period.seconds(120)); cleanedLogs = journal.cleanupLogs(); assertEquals("two segments should've been cleaned (only one will actually be removed...)", cleanedLogs, 2); assertEquals("one segment should remain", countSegmentsInDir(messageJournalDir), 1); } finally { DateTimeUtils.setCurrentMillisSystem(); } } @Test public void segmentCommittedCleanup() throws Exception { final Size segmentSize = Size.kilobytes(1L); final KafkaJournal journal = new KafkaJournal(journalDirectory, scheduler, segmentSize, Duration.standardHours(1), Size.petabytes(1L), // never clean by size in this test Duration.standardDays(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); final File messageJournalDir = new File(journalDirectory, "messagejournal-0"); assertTrue(messageJournalDir.exists()); final int bulkSize = createBulkChunks(journal, segmentSize, 3); // make sure everything is on disk journal.flushDirtyLogs(); assertEquals(countSegmentsInDir(messageJournalDir), 3); // we haven't committed any offsets, this should not touch anything. final int cleanedLogs = journal.cleanupLogs(); assertEquals(cleanedLogs, 0); final int numberOfSegments = countSegmentsInDir(messageJournalDir); assertEquals(numberOfSegments, 3); // mark first half of first segment committed, should not clean anything journal.markJournalOffsetCommitted(bulkSize / 2); assertEquals("should not touch segments", journal.cleanupLogs(), 0); assertEquals(countSegmentsInDir(messageJournalDir), 3); journal.markJournalOffsetCommitted(bulkSize + 1); assertEquals("first segment should've been purged", journal.cleanupLogs(), 1); assertEquals(countSegmentsInDir(messageJournalDir), 2); journal.markJournalOffsetCommitted(bulkSize * 4); assertEquals("only purge one segment, not the active one", journal.cleanupLogs(), 1); assertEquals(countSegmentsInDir(messageJournalDir), 1); } @Test public void lockedJournalDir() throws Exception { // Grab the lock before starting the KafkaJournal. final File file = new File(journalDirectory, ".lock"); assumeTrue(file.createNewFile()); final FileLock fileLock = new FileLock(file); assumeTrue(fileLock.tryLock()); try { new KafkaJournal(journalDirectory, scheduler, Size.megabytes(100L), Duration.standardHours(1), Size.megabytes(5L), Duration.standardHours(1), 1_000_000, Duration.standardMinutes(1), 100, new MetricRegistry(), serverStatus); fail("Expected exception"); } catch (Exception e) { assertThat(e) .isExactlyInstanceOf(RuntimeException.class) .hasMessageStartingWith("kafka.common.KafkaException: Failed to acquire lock on file .lock in") .hasCauseExactlyInstanceOf(KafkaException.class); } } @Test public void serverStatusThrottledIfJournalUtilizationIsHigherThanThreshold() throws Exception { serverStatus.running(); final Size segmentSize = Size.kilobytes(1L); final KafkaJournal journal = new KafkaJournal(journalDirectory, scheduler, segmentSize, Duration.standardSeconds(1L), Size.kilobytes(4L), Duration.standardHours(1L), 1_000_000, Duration.standardSeconds(1L), 90, new MetricRegistry(), serverStatus); createBulkChunks(journal, segmentSize, 4); journal.flushDirtyLogs(); journal.cleanupLogs(); assertThat(serverStatus.getLifecycle()).isEqualTo(Lifecycle.THROTTLED); } @Test public void serverStatusUnthrottledIfJournalUtilizationIsLowerThanThreshold() throws Exception { serverStatus.throttle(); final Size segmentSize = Size.kilobytes(1L); final KafkaJournal journal = new KafkaJournal(journalDirectory, scheduler, segmentSize, Duration.standardSeconds(1L), Size.kilobytes(4L), Duration.standardHours(1L), 1_000_000, Duration.standardSeconds(1L), 90, new MetricRegistry(), serverStatus); journal.flushDirtyLogs(); journal.cleanupLogs(); assertThat(serverStatus.getLifecycle()).isEqualTo(Lifecycle.RUNNING); } }