package io.monokkel.core;
import io.monokkel.domain.PageData;
import io.monokkel.exceptions.IndexDocumentException;
import io.monokkel.factories.ClientFactory;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException;
import java.util.Date;
import static java.lang.String.format;
public class Indexer implements Closeable {
public static final String PAGE = "page";
public static final String TITLE = "title";
public static final String URL = "url";
public static final String DATE = "date";
public static final String CONTENT = "content";
private static final String RAW_RESPONSE = "raw_response";
private static final String TRANSFORMED_CONTENT = "transformed_content";
private Logger log = LoggerFactory.getLogger(Indexer.class);
public ClientFactory clientFactory;
private final String indexName;
private final String indexType;
private final Client client;
private final Boolean shouldIndexRawContent;
public Indexer(final String indexName, final String indexType, final ClientFactory clientFactory, final Boolean shouldIndexRawContent) {
this.indexName = indexName;
this.indexType = indexType;
this.clientFactory = clientFactory;
this.shouldIndexRawContent = shouldIndexRawContent;
this.client = clientFactory.buildStandardClientConnectedToOneNode();
}
/**
*
* Send the content of the parser output to the to the elastic search index. The title, url, date and html
* content is index as fields
*
* @param pageData An not null instance of the parserOutput
*/
@SuppressWarnings("unchecked")
public void indexParserOutput(PageData pageData) throws IndexDocumentException {
XContentBuilder builder;
try {
final Long timeStamp = pageData.getTimestamp();
Date date = new Date(timeStamp);
final XContentBuilder allNonRawContent = XContentFactory.jsonBuilder()
.startObject()
.field(PAGE).startObject().field(TITLE, pageData.getTitle()).field(URL, pageData.getUrl())
.field(DATE, date)
.field(CONTENT, pageData.getExtractedContent()).field(TRANSFORMED_CONTENT, pageData.getTransformed());
builder = shouldIndexRawContent ? allNonRawContent.
field(RAW_RESPONSE, pageData.getResponse())
.endObject() : allNonRawContent.endObject();
final String json = builder.string();
final IndexRequestBuilder indexRequestBuilder = client.prepareIndex(indexName, indexType);
indexRequestBuilder.setSource(json);
indexRequestBuilder.execute();
} catch (Exception e) {
log.error("Failed to index", e);
throw new IndexDocumentException(format("Failed to index %s", pageData.getUrl()),e);
}
}
@Override
public void close() throws IOException {
if(client != null) {
client.close();
}
}
}