/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.msec.sink.es;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.lang.StringUtils;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.apache.flume.formatter.output.PathManager;
import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.map.ObjectMapper;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.apache.flume.formatter.output.PathManagerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.*;
import java.util.concurrent.*;
import java.lang.String;
import java.text.SimpleDateFormat;
public class ESSink extends AbstractSink implements Configurable {
private Logger LOG = LoggerFactory.getLogger(ESSink.class);
public static final int DEFAULT_PORT = 9300;
//members for elasticsearch config
private String clusterName = "testCluster";
private String indexName = "flume";
private String indexType = "logs";
private int batchSize;
private String indexRollingTime = "1day";
private Map<String, Integer> lastIndexRollingMinute = new HashMap<String, Integer>();
private Map<String, String> lastIndexRollingName = new HashMap<String, String>();
private String currentIndexName = "";
private String[] serverAddressStrings = null;
private InetSocketTransportAddress[] serverAddresses;
private int bulkNum;
private int totalCount = 0;
private long timeStart = 0;
private int maxContentLength;
//private OutputStream outputStream;
private ScheduledExecutorService rollService;
private PathManager pathController;
private volatile boolean bulkTimeout;
private static ExecutorService threadPool = Executors.newCachedThreadPool();
private static BlockingQueue< ESClientThread.ESThreadRequest> workingQueue;
ESClientThread.ESThreadRequest esThreadRequest = new ESClientThread.ESThreadRequest();
public ESSink() {
LOG.info("ESSink constructed...");
bulkTimeout = false;
bulkNum = 1;
timeStart = System.currentTimeMillis();
}
@Override
public void configure(Context context) {
pathController = PathManagerFactory.getInstance("DEFAULT", context);
if (StringUtils.isNotBlank(context.getString("hosts"))) {
serverAddressStrings = StringUtils.deleteWhitespace(context.getString("hosts")).split(",");
}
Preconditions.checkState(serverAddressStrings != null
&& serverAddressStrings.length > 0, "Missing Param:" + "hosts");
if (StringUtils.isNotBlank(context.getString("indexName"))) {
this.indexName = context.getString("indexName");
}
if (StringUtils.isNotBlank(context.getString("indexType"))) {
this.indexType = context.getString("indexType");
}
if (StringUtils.isNotBlank(context.getString("clusterName"))) {
this.clusterName = context.getString("clusterName");
}
if (StringUtils.isNotBlank(context.getString("indexRollingTime"))) {
this.indexRollingTime = context.getString("indexRollingTime");
}
bulkNum = context.getInteger("bulkNum", 1);
batchSize = context.getInteger("batchSize", 100);
Preconditions.checkNotNull(batchSize > 0, "batchSize must be a positive number!!");
maxContentLength = context.getInteger("maxContentLength", 1000);
Preconditions.checkNotNull(maxContentLength > 0, "maxContentLength must be a positive number!!");
}
@Override
public void start() {
super.start();
initESThreadPool();
rollService = Executors.newScheduledThreadPool(
1,
new ThreadFactoryBuilder().setNameFormat("ESSink-Bulk-Timer" + Thread.currentThread().getId() + "-%d").build());
rollService.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
//LOG.debug("Marking time to bulk");
bulkTimeout = true;
}
}, 3, 3, TimeUnit.SECONDS);
LOG.info("ESSink {} started.", getName());
}
@Override
public void stop() {
super.stop();
rollService.shutdown();
}
String getCurrentIndexName(String serviceName) {
Calendar cal = Calendar.getInstance();
if (lastIndexRollingMinute.get(serviceName) != null &&
lastIndexRollingMinute.get(serviceName).intValue() == (int)(System.currentTimeMillis() / 60000)) {
//udpate indexType every minute
return lastIndexRollingName.get(serviceName);
}
String currentIndexPrefix = "msec_" + serviceName;
int splitNum = 1;
String splitUnit = "day";
int pos = 0;
while (pos < indexRollingTime.length() && Character.isDigit(indexRollingTime.charAt(pos)))
++pos;
if (pos == 0) {
splitNum = 1;
} else if (pos < indexRollingTime.length()) {
splitNum = Integer.valueOf(indexRollingTime.substring(0, pos).trim());
splitUnit = indexRollingTime.substring(pos).trim();
} else {
splitNum = Integer.valueOf(indexRollingTime.trim());
}
if (splitNum == 0) splitNum = 1;
int month = cal.get(Calendar.MONTH) + 1;
if (splitUnit.compareToIgnoreCase("day") == 0) {
int dayOfMonth = cal.get(Calendar.DAY_OF_MONTH);
if (dayOfMonth % splitNum == 0 || currentIndexName.isEmpty())
currentIndexName = currentIndexPrefix + String.format("%02d%02d", month, dayOfMonth);
} else if (splitUnit.compareToIgnoreCase("hour") == 0) {
int dayOfMonth = cal.get(Calendar.DAY_OF_MONTH);
int hourOfDay = cal.get(Calendar.HOUR_OF_DAY);
if (hourOfDay % splitNum == 0 || currentIndexName.isEmpty())
currentIndexName = currentIndexPrefix + String.format("%02d%02d%02d", month, dayOfMonth, hourOfDay);
} else if (splitUnit.compareToIgnoreCase("min") == 0) {
int dayOfMonth = cal.get(Calendar.DAY_OF_MONTH);
int hourOfDay = cal.get(Calendar.HOUR_OF_DAY);
int minute = cal.get(Calendar.MINUTE);
if (hourOfDay % splitNum == 0 || currentIndexName.isEmpty())
currentIndexName = currentIndexPrefix + String.format("%02d%02d%02d%02d",
month, dayOfMonth, hourOfDay, minute);
}
lastIndexRollingMinute.put(serviceName, (int)(System.currentTimeMillis() / 60000));
lastIndexRollingName.put(serviceName, currentIndexName);
return currentIndexName;
}
private void initESThreadPool() {
serverAddresses = new InetSocketTransportAddress[serverAddressStrings.length];
workingQueue = new ArrayBlockingQueue<ESClientThread.ESThreadRequest>(serverAddressStrings.length * 2);
for (int i = 0; i < serverAddressStrings.length; i++) {
String[] hostPort = serverAddressStrings[i].trim().split(":");
String host = hostPort[0].trim();
int port = hostPort.length == 2 ? Integer.parseInt(hostPort[1].trim()) : DEFAULT_PORT;
try {
serverAddresses[i] = new InetSocketTransportAddress(InetAddress.getByName(host), port);
threadPool.submit(new ESClientThread(workingQueue, clusterName, serverAddresses[i]));
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
}
public void submitESRequest(ESClientThread.ESThreadRequest request) {
try {
workingQueue.put(request);
} catch (InterruptedException e) {
e.printStackTrace();
}
totalCount += request.sourceList.size() - 1;
if (totalCount > 20000)
{
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
System.out.println(df.format(new Date()) + " totalCount: " + totalCount +
" cost: " + (System.currentTimeMillis() - timeStart) +
" qps: " + totalCount * 1000 / (System.currentTimeMillis() - timeStart));
timeStart = System.currentTimeMillis();
totalCount = 0;
}
}
private void doSerialize(Event event) throws IOException
{
Map<String, String> headers = event.getHeaders();
String content = null;
String serviceName = "";
if (!headers.containsKey("InsTime")) {
long insTime = System.currentTimeMillis();
headers.put("InsTime", String.valueOf(insTime));
}
if (headers.containsKey("ServiceName")) {
serviceName = headers.get("ServiceName");
int pos = serviceName.indexOf(".");
if (pos > 0) {
serviceName = serviceName.substring(0, pos);
}
}
ObjectMapper objectMapper = new ObjectMapper();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
JsonGenerator jgen = null;
try {
jgen = objectMapper.getJsonFactory().createJsonGenerator(baos, JsonEncoding.UTF8);
jgen.writeStartObject();
for (String headerKey: headers.keySet()) {
String headerValue = headers.get(headerKey);
if (headerValue != null && !headerValue.isEmpty()) {
jgen.writeStringField(headerKey, headerValue);
}
}
content = new String(event.getBody());
content = content.replace('\t', ' ').replace('\n', ' ');
if (content.length() > maxContentLength) {
content = content.substring(0, maxContentLength - 15) + "<..truncated..>";
}
jgen.writeStringField("body", content);
jgen.writeEndObject();
jgen.flush();
jgen = null;
//outputStream.write((baos.toString() + "\n").getBytes());
String curIndexName = getCurrentIndexName(serviceName.toLowerCase());
//LOG.info("index: (" + curIndexName + "," + indexType + ") source: " + baos.toString());
esThreadRequest.sourceList.add(baos.toString());
esThreadRequest.indexNameList.add(curIndexName);
esThreadRequest.indexTypeList.add(indexType);
if (esThreadRequest.sourceList.size() >= bulkNum) {
submitESRequest(esThreadRequest);
esThreadRequest = new ESClientThread.ESThreadRequest();
}
} catch (IOException e) {
e.printStackTrace();
}
LOG.info("ES sink process: " + content + " " + headers.get("IP") + " " + headers.get("Level") + " " +headers.get("RPCName"));
}
@Override
public Status process() throws EventDeliveryException {
Status result = Status.READY;
Channel channel = getChannel();
Transaction transaction = channel.getTransaction();
Event event;
if (bulkTimeout) {
bulkTimeout = false;
if ( !esThreadRequest.sourceList.isEmpty() ) {
LOG.info("ES bulk timeout");
submitESRequest(esThreadRequest);
esThreadRequest = new ESClientThread.ESThreadRequest();
}
}
try {
transaction.begin();
for (int i = 0; i < batchSize; i++) {
event = channel.take();
if (event != null) {
doSerialize(event);
} else {
// No events found, request back-off semantics from runner
result = Status.BACKOFF;
break;
}
}
transaction.commit();
} catch (Exception ex) {
transaction.rollback();
throw new EventDeliveryException("Failed to process transaction", ex);
} finally {
transaction.close();
}
return result;
}
}