/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.streams.elasticsearch.processor; import org.apache.streams.core.StreamsDatum; import org.apache.streams.core.StreamsProcessor; import org.apache.streams.elasticsearch.ElasticsearchClientManager; import org.apache.streams.elasticsearch.ElasticsearchConfiguration; import org.apache.streams.elasticsearch.ElasticsearchWriterConfiguration; import org.apache.streams.jackson.StreamsJacksonMapper; import org.apache.streams.pojo.extensions.ExtensionUtil; import org.apache.streams.pojo.json.Activity; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.percolate.PercolateRequestBuilder; import org.elasticsearch.action.percolate.PercolateResponse; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryStringQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Queue; import java.util.Set; /** * References: * Some helpful references to help * Purpose URL * ------------- ---------------------------------------------------------------- * [Status Codes] http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html * [Test Cases] http://greenbytes.de/tech/tc/httpredirects/ * [t.co behavior] https://dev.twitter.com/docs/tco-redirection-behavior */ public class PercolateTagProcessor implements StreamsProcessor { public static final String STREAMS_ID = "PercolateTagProcessor"; private static final Logger LOGGER = LoggerFactory.getLogger(PercolateTagProcessor.class); private static final String DEFAULT_PERCOLATE_FIELD = "_all"; private ObjectMapper mapper; protected Queue<StreamsDatum> inQueue; protected Queue<StreamsDatum> outQueue; public static final String TAGS_EXTENSION = "tags"; private ElasticsearchWriterConfiguration config; private ElasticsearchClientManager manager; private BulkRequestBuilder bulkBuilder; protected String usePercolateField; public PercolateTagProcessor(ElasticsearchWriterConfiguration config) { this(config, DEFAULT_PERCOLATE_FIELD); } public PercolateTagProcessor(ElasticsearchWriterConfiguration config, String defaultPercolateField) { this.config = config; this.usePercolateField = defaultPercolateField; } public ElasticsearchClientManager getManager() { return manager; } public void setManager(ElasticsearchClientManager manager) { this.manager = manager; } public ElasticsearchConfiguration getConfig() { return config; } public void setConfig(ElasticsearchWriterConfiguration config) { this.config = config; } public Queue<StreamsDatum> getProcessorOutputQueue() { return outQueue; } @Override public String getId() { return STREAMS_ID; } @Override public List<StreamsDatum> process(StreamsDatum entry) { List<StreamsDatum> result = new ArrayList<>(); String json; ObjectNode node; // first check for valid json if (entry.getDocument() instanceof String) { json = (String) entry.getDocument(); try { node = (ObjectNode) mapper.readTree(json); } catch (IOException ex) { ex.printStackTrace(); return null; } } else if (entry.getDocument() instanceof ObjectNode) { node = (ObjectNode) entry.getDocument(); try { json = mapper.writeValueAsString(node); } catch (JsonProcessingException ex) { LOGGER.warn("Invalid datum: ", node); return null; } } else { LOGGER.warn("Incompatible document type: ", entry.getDocument().getClass()); return null; } StringBuilder percolateRequestJson = new StringBuilder(); percolateRequestJson.append("{ \"doc\": "); percolateRequestJson.append(json); //percolateRequestJson.append("{ \"content\" : \"crazy good shit\" }"); percolateRequestJson.append("}"); PercolateRequestBuilder request; PercolateResponse response; try { LOGGER.trace("Percolate request json: {}", percolateRequestJson.toString()); request = manager.client().preparePercolate().setIndices(config.getIndex()).setDocumentType(config.getType()).setSource(percolateRequestJson.toString()); LOGGER.trace("Percolate request: {}", mapper.writeValueAsString(request.request())); response = request.execute().actionGet(); LOGGER.trace("Percolate response: {} matches", response.getMatches().length); } catch (Exception ex) { LOGGER.warn("Percolate exception: {}", ex.getMessage()); return null; } ArrayNode tagArray = JsonNodeFactory.instance.arrayNode(); for (PercolateResponse.Match aResponse : response) { tagArray.add(aResponse.getId().string()); } LOGGER.trace("Percolate matches: {}", tagArray); Activity activity = mapper.convertValue(node, Activity.class); appendMatches(tagArray, activity); entry.setDocument(activity); result.add(entry); return result; } protected void appendMatches(ArrayNode tagArray, Activity activity) { ExtensionUtil.getInstance().addExtension(activity, TAGS_EXTENSION, tagArray); } @Override public void prepare(Object configuration) { mapper = StreamsJacksonMapper.getInstance(); Objects.requireNonNull(config); manager = ElasticsearchClientManager.getInstance(config); if ( config.getTags() != null && config.getTags().getAdditionalProperties().size() > 0) { // initial write tags to index createIndexIfMissing(config.getIndex()); if (config.getReplaceTags()) { deleteOldQueries(config.getIndex()); } for (String tag : config.getTags().getAdditionalProperties().keySet()) { String query = (String) config.getTags().getAdditionalProperties().get(tag); PercolateQueryBuilder queryBuilder = new PercolateQueryBuilder(tag, query, this.usePercolateField); addPercolateRule(queryBuilder, config.getIndex()); } bulkBuilder = manager.client().prepareBulk(); if (writePercolateRules()) { LOGGER.info("wrote " + bulkBuilder.numberOfActions() + " tags to " + config.getIndex() + " _percolator"); } else { LOGGER.error("FAILED writing " + bulkBuilder.numberOfActions() + " tags to " + config.getIndex() + " _percolator"); } } } @Override public void cleanUp() { if (config.getCleanupTags()) { deleteOldQueries(config.getIndex()); } manager.client().close(); } public int numOfPercolateRules() { return this.bulkBuilder.numberOfActions(); } /** * createIndexIfMissing. * @param indexName indexName */ public void createIndexIfMissing(String indexName) { if (!this.manager.client() .admin() .indices() .exists(new IndicesExistsRequest(indexName)) .actionGet() .isExists()) { // It does not exist... So we are going to need to create the index. // we are going to assume that the 'templates' that we have loaded into // elasticsearch are sufficient to ensure the index is being created properly. CreateIndexResponse response = this.manager.client().admin().indices().create(new CreateIndexRequest(indexName)).actionGet(); if (response.isAcknowledged()) { LOGGER.info("Index {} did not exist. The index was automatically created from the stored ElasticSearch Templates.", indexName); } else { LOGGER.error("Index {} did not exist. While attempting to create the index from stored ElasticSearch Templates we were unable to get an acknowledgement.", indexName); LOGGER.error("Error Message: {}", response.toString()); throw new RuntimeException("Unable to create index " + indexName); } } } public void addPercolateRule(PercolateQueryBuilder builder, String index) { this.bulkBuilder.add(manager.client().prepareIndex(index, ".percolator", builder.getId()) .setSource(builder.getSource())); } /** * * @return returns true if all rules were addded. False indicates one or more rules have failed. */ public boolean writePercolateRules() { if (this.numOfPercolateRules() < 0) { throw new RuntimeException("No Rules Have been added!"); } BulkResponse response = this.bulkBuilder.execute().actionGet(); for (BulkItemResponse r : response.getItems()) { if (r.isFailed()) { LOGGER.error(r.getId() + "\t" + r.getFailureMessage()); } } return !response.hasFailures(); } /** * Attempt to removeOldTags. * @param ids ids * @param index index * @return Returns true if all of the old tags were removed. False indicates one or more tags were not removed. */ public boolean removeOldTags(Set<String> ids, String index) { if (ids.size() == 0) { return false; } BulkRequestBuilder bulk = manager.client().prepareBulk(); for (String id : ids) { bulk.add(manager.client().prepareDelete("_percolator", index, id)); } return !bulk.execute().actionGet().hasFailures(); } /** * get active percolate tags. * @param index index * @return result */ public Set<String> getActivePercolateTags(String index) { Set<String> tags = new HashSet<>(); SearchRequestBuilder searchBuilder = manager.client().prepareSearch("*").setIndices(index).setTypes(".percolator").setSize(1000); SearchResponse response = searchBuilder.setQuery(QueryBuilders.matchAllQuery()).execute().actionGet(); SearchHits hits = response.getHits(); for (SearchHit hit : hits.getHits()) { tags.add(hit.id()); } return tags; } /** * delete old queries. * @param index index * @return result */ public boolean deleteOldQueries(String index) { Set<String> tags = getActivePercolateTags(index); if (tags.size() == 0) { LOGGER.warn("No active tags were found in _percolator for index : {}", index); return false; } LOGGER.info("Deleting {} tags.", tags.size()); BulkRequestBuilder bulk = manager.client().prepareBulk(); for (String tag : tags) { bulk.add(manager.client().prepareDelete().setType(".percolator").setIndex(index).setId(tag)); } BulkResponse response = bulk.execute().actionGet(); return !response.hasFailures(); } public static class PercolateQueryBuilder { private QueryStringQueryBuilder queryBuilder; private String id; /** * PercolateQueryBuilder constructor. * @param id * @param query * @param defaultPercolateField */ public PercolateQueryBuilder(String id, String query, String defaultPercolateField) { this.id = id; this.queryBuilder = new QueryStringQueryBuilder(query); this.queryBuilder.defaultField(defaultPercolateField); } public String getId() { return this.id; } public String getSource() { return "{ \n\"query\" : " + this.queryBuilder.toString() + "\n}"; } } public enum FilterLevel { MUST, SHOULD, MUST_NOT } }