package focusedCrawler.target.repository;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.Client;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import focusedCrawler.target.model.Page;
import focusedCrawler.target.model.TargetModelElasticSearch;
import focusedCrawler.target.repository.elasticsearch.ElasticSearchClientFactory;
import focusedCrawler.target.repository.elasticsearch.ElasticSearchConfig;
public class ElasticSearchTargetRepository implements TargetRepository {
private static final ObjectMapper mapper = new ObjectMapper();
static {
mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
}
private Client client;
private String typeName;
private String indexName;
public ElasticSearchTargetRepository(ElasticSearchConfig config,
String indexName,
String typeName) {
this.client = ElasticSearchClientFactory.createClient(config);
this.indexName = indexName;
this.typeName = typeName;
this.createIndexMapping(indexName);
}
private void createIndexMapping(String indexName) {
boolean exists =
client.admin().indices().prepareExists(indexName).execute().actionGet().isExists();
if(!exists) {
String targetMapping = ""
+ "{"
+ " \"properties\": {"
+ " \"domain\": {\"type\": \"string\",\"index\": \"not_analyzed\"},"
+ " \"words\": {\"type\": \"string\",\"index\": \"not_analyzed\"},"
+ " \"wordsMeta\": {\"type\": \"string\",\"index\": \"not_analyzed\"},"
+ " \"retrieved\": {\"type\": \"date\",\"format\": \"dateOptionalTime\"},"
+ " \"text\": {\"type\": \"string\"},"
+ " \"title\": {\"type\": \"string\"},"
+ " \"url\": {\"type\": \"string\",\"index\": \"not_analyzed\"},"
+ " \"topPrivateDomain\": {\"type\": \"string\",\"index\": \"not_analyzed\"},"
+ " \"isRelevant\": {\"type\": \"string\",\"index\": \"not_analyzed\"},"
+ " \"relevance\": {\"type\": \"double\"}"
+ " }"
+ "}";
client.admin().indices().prepareCreate(indexName)
.addMapping(typeName, targetMapping)
.execute()
.actionGet();
}
}
@Override
public boolean insert(Page page) {
TargetModelElasticSearch data = new TargetModelElasticSearch(page);
String docId = page.getURL().toString();
IndexResponse response = client.prepareIndex(indexName, typeName, docId)
.setSource(serializeAsJson(data))
.execute()
.actionGet();
return response.isCreated();
}
private String serializeAsJson(Object model) {
String targetAsJson;
try {
targetAsJson = mapper.writeValueAsString(model);
} catch (JsonProcessingException e) {
throw new RuntimeException("Failed to serialize TargetModel to JSON.", e);
}
return targetAsJson;
}
@Override
public void close() {
client.close();
}
}