/*
* Copyright 2014 CyberVision, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kaaproject.kaa.server.flume;
import com.google.common.collect.Lists;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.FileReader;
import org.apache.avro.file.SeekableInput;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.mapred.FsInput;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.flume.Channel;
import org.apache.flume.ChannelSelector;
import org.apache.flume.Context;
import org.apache.flume.Sink;
import org.apache.flume.SinkProcessor;
import org.apache.flume.SinkRunner;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.channel.MemoryChannel;
import org.apache.flume.conf.Configurables;
import org.apache.flume.sink.DefaultSinkProcessor;
import org.apache.flume.source.AvroSource;
import org.apache.flume.source.avro.AvroFlumeEvent;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobConf;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.kaaproject.kaa.server.common.log.shared.RecordWrapperSchemaGenerator;
import org.kaaproject.kaa.server.common.log.shared.avro.gen.RecordData;
import org.kaaproject.kaa.server.common.log.shared.avro.gen.RecordHeader;
import org.kaaproject.kaa.server.flume.channel.KaaLoadChannelSelector;
import org.kaaproject.kaa.server.flume.sink.hdfs.KaaHdfsSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class TestKaaHdfsSink {
private static final Logger LOG = LoggerFactory.getLogger(TestKaaHdfsSink.class);
private static AvroSource source;
private static KaaHdfsSink sink;
private static Channel channel;
private static MiniDFSCluster dfsCluster = null;
private static FileSystem fileSystem = null;
private static String applicationToken = "42342342";
private static byte[] endpointKeyHash = new byte[]{6, 3, 8, 4, 7, 5, 3, 6};
private static int logSchemaVersion = 1;
private static File logSchemasRootDir;
private SinkRunner sinkRunner;
private long flushRecordsCountSmall = 1000;
private long blockSizeSmall = 2097152;
private long flushRecordsCount = flushRecordsCountSmall;
private long blockSize = blockSizeSmall;
@BeforeClass
public static void setUp() throws IOException {
int dataNodes = 1;
int port = 29999;
JobConf conf = new JobConf();
channel = new MemoryChannel();
conf.set("dfs.block.access.token.enable", "false");
conf.set("dfs.permissions", "true");
conf.set("hadoop.security.authentication", "simple");
conf.set("fs.default.name", "hdfs://localhost:29999");
dfsCluster = new MiniDFSCluster(port, conf, dataNodes, true, true, null, null);
fileSystem = dfsCluster.getFileSystem();
fileSystem.delete(new Path("/logs"), true);
source = new AvroSource();
sink = new KaaHdfsSink();
logSchemasRootDir = new File("schemas");
if (logSchemasRootDir.exists()) {
logSchemasRootDir.delete();
}
prepareSchema(logSchemasRootDir);
}
private static void prepareSchema(File rootDir) throws IOException {
File schemaDir = new File(rootDir, "" + applicationToken);
if (!schemaDir.exists()) {
schemaDir.mkdirs();
}
File schemaFile = new File(schemaDir, "schema_v" + logSchemaVersion);
FileUtils.write(schemaFile, TestLogData.getClassSchema().toString());
}
@AfterClass
public static void deleteTempDirectory() {
dfsCluster.shutdown();
try {
FileUtils.deleteDirectory(new File("build"));
FileUtils.deleteDirectory(new File("schemas"));
} catch (IOException e) {
}
}
@Test
public void testLogDataEvents() throws Exception {
source.setName("testLogDataSource");
sink.setName("testLogDataSink");
Context context = prepareContext();
runTestAndCheckResult(context);
}
private void runTestAndCheckResult(Context context) throws IOException {
Configurables.configure(source, context);
Configurables.configure(channel, context);
ChannelSelector cs = new KaaLoadChannelSelector();
cs.setChannels(Lists.newArrayList(channel));
Configurables.configure(cs, context);
source.setChannelProcessor(new ChannelProcessor(cs));
Configurables.configure(sink, context);
sink.setChannel(channel);
sinkRunner = new SinkRunner();
SinkProcessor policy = new DefaultSinkProcessor();
List<Sink> sinks = new ArrayList<Sink>();
sinks.add(sink);
policy.setSinks(sinks);
sinkRunner.setSink(policy);
sinkRunner.start();
source.start();
RecordHeader header = new RecordHeader();
header.setApplicationToken(applicationToken);
header.setEndpointKeyHash(new String(endpointKeyHash));
header.setHeaderVersion(1);
header.setTimestamp(System.currentTimeMillis());
List<TestLogData> testLogs = generateAndSendRecords(header);
LOG.info("Sent records count: " + testLogs.size());
LOG.info("Waiting for sink...");
int maxWaitTime = 5000;
int elapsed = 0;
while (sink.getEventDrainSuccessCount() < testLogs.size() && elapsed < maxWaitTime) {
try {
Thread.sleep(1000);
elapsed += 1000;
} catch (InterruptedException e) {
}
}
Assert.assertTrue(sink.getEventDrainSuccessCount() == testLogs.size());
source.stop();
sinkRunner.stop();
readAndCheckResultsFromHdfs(header, testLogs);
}
private List<TestLogData> generateAndSendRecords(RecordHeader header) throws IOException {
int count = 100;
List<TestLogData> testLogs = new ArrayList<>();
RecordData logData = new RecordData();
logData.setRecordHeader(header);
logData.setApplicationToken(applicationToken);
logData.setSchemaVersion(logSchemaVersion);
List<ByteBuffer> events = new ArrayList<>();
SpecificDatumWriter<TestLogData> avroWriter = new SpecificDatumWriter<>(TestLogData.class);
ByteArrayOutputStream baos;
BinaryEncoder encoder = null;
for (int i = 0; i < count; i++) {
TestLogData testLogData = new TestLogData();
testLogData.setLevel(i % 2 == 0 ? Level.INFO : Level.DEBUG);
testLogData.setTag("TestKaaHdfsSink");
testLogData.setMessage("Test log message # " + i);
baos = new ByteArrayOutputStream();
encoder = EncoderFactory.get().binaryEncoder(baos, encoder);
avroWriter.write(testLogData, encoder);
encoder.flush();
byte[] data = baos.toByteArray();
events.add(ByteBuffer.wrap(data));
testLogs.add(testLogData);
}
logData.setEventRecords(events);
SpecificDatumWriter<RecordData> logDataAvroWriter = new SpecificDatumWriter<>(RecordData.class);
baos = new ByteArrayOutputStream();
encoder = EncoderFactory.get().binaryEncoder(baos, encoder);
logDataAvroWriter.write(logData, encoder);
encoder.flush();
byte[] data = baos.toByteArray();
AvroFlumeEvent eventToSend = new AvroFlumeEvent();
eventToSend.setHeaders(new HashMap<CharSequence, CharSequence>());
eventToSend.setBody(ByteBuffer.wrap(data));
source.append(eventToSend);
return testLogs;
}
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException {
Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*");
FileStatus[] statuses = fileSystem.globStatus(logsPath);
List<TestLogData> resultTestLogs = new ArrayList<>();
Schema wrapperSchema = RecordWrapperSchemaGenerator.generateRecordWrapperSchema(TestLogData.getClassSchema().toString());
for (FileStatus status : statuses) {
FileReader<GenericRecord> fileReader = null;
try {
SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf());
DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema);
fileReader = DataFileReader.openReader(input, datumReader);
for (GenericRecord record : fileReader) {
RecordHeader recordHeader = (RecordHeader) record.get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD);
Assert.assertEquals(header, recordHeader);
TestLogData recordData = (TestLogData) record.get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD);
resultTestLogs.add(recordData);
}
} finally {
IOUtils.closeQuietly(fileReader);
}
}
Assert.assertEquals(testLogs, resultTestLogs);
}
private Context prepareContext() throws IOException {
Context context = new Context();
// Channel parameters
context.put("capacity", "100000000");
context.put("transactionCapacity", "10000000");
context.put("keep-alive", "1");
context.put("port", "31333");
context.put("bind", "localhost");
context.put(ConfigurationConstants.CONFIG_ROOT_HDFS_PATH, fileSystem.makeQualified(new Path("/logs")).toString());
context.put(ConfigurationConstants.CONFIG_HDFS_TXN_EVENT_MAX, "100000");
context.put(ConfigurationConstants.CONFIG_HDFS_THREAD_POOL_SIZE, "20");
context.put(ConfigurationConstants.CONFIG_HDFS_ROLL_TIMER_POOL_SIZE, "1");
context.put(ConfigurationConstants.CONFIG_HDFS_MAX_OPEN_FILES, "5000");
context.put(ConfigurationConstants.CONFIG_HDFS_CALL_TIMEOUT, "10000");
context.put(ConfigurationConstants.CONFIG_HDFS_ROLL_INTERVAL, "86400000"); // milliseconds
context.put(ConfigurationConstants.CONFIG_HDFS_ROLL_SIZE, "0"); // bytes (0 means don't roll by size)
context.put(ConfigurationConstants.CONFIG_HDFS_ROLL_COUNT, "5500000"); // records count
context.put(ConfigurationConstants.CONFIG_HDFS_BATCH_SIZE, "" + flushRecordsCount); // flush records count
context.put(ConfigurationConstants.CONFIG_HDFS_DEFAULT_BLOCK_SIZE, "" + blockSize); // default dfs block size in bytes
context.put(ConfigurationConstants.CONFIG_HDFS_FILE_PREFIX, "data");
context.put(ConfigurationConstants.CONFIG_STATISTICS_INTERVAL, "10");
context.put("serializer.compressionCodec", "null");
context.put("serializer.avro.schema.source", "local");
context.put("serializer.avro.schema.local.root", logSchemasRootDir.getAbsolutePath());
return context;
}
}