package io.monokkel.core; import io.monokkel.domain.PageData; import io.monokkel.exceptions.IndexDocumentException; import io.monokkel.factories.ClientFactory; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.client.Client; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Closeable; import java.io.IOException; import java.util.Date; import static java.lang.String.format; public class Indexer implements Closeable { public static final String PAGE = "page"; public static final String TITLE = "title"; public static final String URL = "url"; public static final String DATE = "date"; public static final String CONTENT = "content"; private static final String RAW_RESPONSE = "raw_response"; private static final String TRANSFORMED_CONTENT = "transformed_content"; private Logger log = LoggerFactory.getLogger(Indexer.class); public ClientFactory clientFactory; private final String indexName; private final String indexType; private final Client client; private final Boolean shouldIndexRawContent; public Indexer(final String indexName, final String indexType, final ClientFactory clientFactory, final Boolean shouldIndexRawContent) { this.indexName = indexName; this.indexType = indexType; this.clientFactory = clientFactory; this.shouldIndexRawContent = shouldIndexRawContent; this.client = clientFactory.buildStandardClientConnectedToOneNode(); } /** * * Send the content of the parser output to the to the elastic search index. The title, url, date and html * content is index as fields * * @param pageData An not null instance of the parserOutput */ @SuppressWarnings("unchecked") public void indexParserOutput(PageData pageData) throws IndexDocumentException { XContentBuilder builder; try { final Long timeStamp = pageData.getTimestamp(); Date date = new Date(timeStamp); final XContentBuilder allNonRawContent = XContentFactory.jsonBuilder() .startObject() .field(PAGE).startObject().field(TITLE, pageData.getTitle()).field(URL, pageData.getUrl()) .field(DATE, date) .field(CONTENT, pageData.getExtractedContent()).field(TRANSFORMED_CONTENT, pageData.getTransformed()); builder = shouldIndexRawContent ? allNonRawContent. field(RAW_RESPONSE, pageData.getResponse()) .endObject() : allNonRawContent.endObject(); final String json = builder.string(); final IndexRequestBuilder indexRequestBuilder = client.prepareIndex(indexName, indexType); indexRequestBuilder.setSource(json); indexRequestBuilder.execute(); } catch (Exception e) { log.error("Failed to index", e); throw new IndexDocumentException(format("Failed to index %s", pageData.getUrl()),e); } } @Override public void close() throws IOException { if(client != null) { client.close(); } } }