/*
* JBoss, Home of Professional Open Source
* Copyright 2013 Red Hat Inc. and/or its affiliates and other contributors
* as indicated by the @authors tag. All rights reserved.
*/
package org.searchisko.api.reindexer;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.client.Client;
import org.elasticsearch.search.SearchHit;
import org.searchisko.api.ContentObjectFields;
import org.searchisko.api.rest.exception.PreprocessorInvalidDataException;
import org.searchisko.api.service.ProviderService;
import org.searchisko.api.service.ProviderService.ProviderContentTypeInfo;
import org.searchisko.api.service.SearchClientService;
/**
* Base for tasks used to renormalize document in ES search indices. All documents for specified filters (implemented in
* {@link #addFilters(SearchRequestBuilder)}) are loaded from all ES indices with Searchisko content, all preprocessors
* are applied to content, and then it is stored back to the ES index.
*
* @author Vlastimil Elias (velias at redhat dot com)
*/
public abstract class RenormalizeTaskBase extends ReindexingTaskBase {
protected Set<String> indexNames = new HashSet<String>();
protected Set<String> indexTypes = new HashSet<String>();
/**
* @param providerService
* @param searchClientService
*/
public RenormalizeTaskBase(ProviderService providerService, SearchClientService searchClientService) {
super(providerService, searchClientService);
}
/**
* Constructor for unit tests.
*/
protected RenormalizeTaskBase() {
}
@Override
protected boolean validateTaskConfiguration() throws Exception {
loadIndexNamesAndTypesForWholeContent();
return !indexNames.isEmpty();
}
@Override
protected SearchRequestBuilder prepareSearchRequest(Client client) {
SearchRequestBuilder srb = client.prepareSearch(getIndexNamesAsArray()).setTypes(getIndexTypesAsArray())
.addField("_source");
addFilters(srb);
return srb;
}
/**
* Add filters to select content to be reindexed. Called from {@link #prepareSearchRequest(Client)}.
*
* @param srb to add filters into
*/
protected abstract void addFilters(SearchRequestBuilder srb);
@Override
protected void performHitProcessing(Client client, BulkRequestBuilder brb, SearchHit hit) {
Map<String, Object> content = hit.getSource();
String id = hit.getId();
String sysContentType = (String) content.get(ContentObjectFields.SYS_CONTENT_TYPE);
ProviderContentTypeInfo typeDef = providerService.findContentType(sysContentType);
if (typeDef == null) {
writeTaskLog("No type definition found for document id=" + id + " so is skipped");
} else {
try {
// Run preprocessors to normalize mapped fields
providerService.runPreprocessors(sysContentType, ProviderService.extractPreprocessors(typeDef, sysContentType),
content);
} catch (PreprocessorInvalidDataException e) {
writeTaskLog("ERROR: Data error from preprocessors execution so document " + id + " is skipped: "
+ e.getMessage());
return;
}
// put content back into search subsystem
brb.add(client.prepareIndex(ProviderService.extractIndexName(typeDef, sysContentType),
ProviderService.extractIndexType(typeDef, sysContentType), id).setSource(content));
}
}
@Override
protected void performPostReindexingProcessing(Client client) {
if (indexNames.size() > 0)
client.admin().indices().prepareFlush(getIndexNamesAsArray()).execute().actionGet();
client.admin().indices().prepareRefresh(getIndexNamesAsArray()).execute().actionGet();
}
/**
* Fill {@link #indexNames} and {@link #indexTypes} fields with all indices and types for all configured Searchisko
* content types.
*
* @see ProviderService#getAll()
*/
protected void loadIndexNamesAndTypesForWholeContent() {
List<Map<String, Object>> providers = providerService.getAll();
for (Map<String, Object> providerDef : providers) {
Map<String, Map<String, Object>> allTypes = ProviderService.extractAllContentTypes(providerDef);
if (allTypes != null) {
for (String sysContentType : allTypes.keySet()) {
Map<String, Object> typeDef = allTypes.get(sysContentType);
indexNames.add(ProviderService.extractIndexName(typeDef, sysContentType));
indexTypes.add(ProviderService.extractIndexType(typeDef, sysContentType));
}
}
}
}
/**
* @return {@link #indexNames} as string array to be passed into ES API methods
*/
protected String[] getIndexNamesAsArray() {
return indexNames.toArray(new String[indexNames.size()]);
}
/**
* @return {@link #indexTypes} as string array to be passed into ES API methods
*/
protected String[] getIndexTypesAsArray() {
return indexTypes.toArray(new String[indexTypes.size()]);
}
}