package focusedCrawler.tools;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpEntity;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
public class SimpleBulkIndexer {
static final ObjectMapper jsonMapper = new ObjectMapper();
static {
jsonMapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
}
private StringBuilder bulkData = new StringBuilder();
private int bulkSize = 0;
private int maxBulkSize;
private int retries = 3;
private String elasticSearchAddress;
private BasicHeader authHeader;
private static CloseableHttpClient httpclient = HttpClients.createDefault();
public SimpleBulkIndexer(String elasticSearchAddress) {
this(elasticSearchAddress, null, 25);
}
public SimpleBulkIndexer(String elasticSearchAddress, int maxBulkSize) {
this(elasticSearchAddress, null, maxBulkSize);
}
public SimpleBulkIndexer(String elasticSearchAddress, String authHeader) {
this(elasticSearchAddress, authHeader, 25);
}
public SimpleBulkIndexer(String elasticSearchAddress,
String userAndPassword,
int maxBulkSize) {
this.elasticSearchAddress = elasticSearchAddress;
this.maxBulkSize = maxBulkSize;
if(userAndPassword != null) {
String headerName = "Authorization";
String headerValue = "Basic " + Base64.encodeBase64String(userAndPassword.getBytes());
System.out.println(headerName);
System.out.println(headerValue);
this.authHeader = new BasicHeader(headerName, headerValue);
}
}
public void addDocument(String indexName, String typeName, Object obj, String id) throws IOException {
String command;
if(id == null) {
command = "{ \"index\" : { \"_index\" : \""+indexName+"\", \"_type\" : \""+typeName+"\"} }";
} else {
command = "{ \"index\" : { \"_index\" : \""+indexName+"\", \"_type\" : \""+typeName+"\", \"_id\":\""+id+"\"} }";
}
final String json;
try {
if(obj instanceof String) {
json = (String) obj;
} else {
json = jsonMapper.writeValueAsString(obj);
}
} catch (JsonProcessingException e) {
throw new RuntimeException("Failed to serialize JSON object. ", e);
}
bulkData.append(command+"\n");
bulkData.append(json+"\n");
bulkSize++;
if(bulkSize >= maxBulkSize) {
flushBulk();
}
}
public void flushBulk() throws IOException {
if(bulkSize > 0) {
executeBulkRequestWithRetries(bulkData.toString(), retries);
bulkSize = 0;
bulkData = new StringBuilder();
}
}
public void bulkIndexDocuments(String indexName,
String typeName,
List<String> sources) throws IOException {
StringBuilder builder = new StringBuilder();
for (String source : sources) {
String command = "{ \"index\" : { \"_index\" : \""+indexName+"\", \"_type\" : \""+typeName+"\"} }";
builder.append(command+"\n");
builder.append(source+"\n");
}
executeBulkRequestWithRetries(builder.toString(), retries);
}
public void bulkIndexDocumentsWithId(String indexName,
String typeName,
Map<String, String> sources) throws IOException {
StringBuilder builder = new StringBuilder();
for (Map.Entry<String, String> source : sources.entrySet()) {
String command = "{ \"index\" : { \"_index\" : \""+indexName+"\", \"_type\" : \""+typeName+"\", \"_id\":\""+source.getKey()+"\"} }";
builder.append(command+"\n");
builder.append(source.getValue()+"\n");
}
executeBulkRequestWithRetries(builder.toString(), retries);
}
private void executeBulkRequestWithRetries(String requestBody, int retries) {
for(int i=0; i < retries; i++) {
try {
executeBulkRequest(requestBody.toString());
break;
} catch(Exception e) {
retries++;
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
throw new RuntimeException("Bulk request retry interruped.", ie);
}
}
}
}
private void executeBulkRequest(final String requestBody) throws IOException {
HttpPost httpPost = new HttpPost(elasticSearchAddress +"/_bulk");
if(authHeader != null) {
httpPost.addHeader(authHeader);
}
httpPost.setEntity(new StringEntity(
requestBody,
ContentType.create(URLEncodedUtils.CONTENT_TYPE, Charset.forName("UTF-8"))
));
CloseableHttpResponse response = httpclient.execute(httpPost);
try {
HttpEntity entity = response.getEntity();
String entityAsText = EntityUtils.toString(entity);
StatusLine statusLine = response.getStatusLine();
System.out.println(statusLine.toString());
if(statusLine.getStatusCode() != 200) {
System.out.println(entityAsText);
}
} finally {
response.close();
}
}
public void close() throws IOException {
flushBulk();
httpclient.close();
}
}