/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.nifi.processors.solr; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Properties; import java.util.Set; import java.util.TimeZone; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.io.IOUtils; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnRemoved; import org.apache.nifi.annotation.lifecycle.OnStopped; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.ProcessorInitializationContext; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.OutputStreamCallback; import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.util.StopWatch; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; @Tags({"Apache", "Solr", "Get", "Pull"}) @InputRequirement(Requirement.INPUT_FORBIDDEN) @CapabilityDescription("Queries Solr and outputs the results as a FlowFile") public class GetSolr extends SolrProcessor { public static final PropertyDescriptor SOLR_QUERY = new PropertyDescriptor .Builder().name("Solr Query") .description("A query to execute against Solr") .required(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor RETURN_FIELDS = new PropertyDescriptor .Builder().name("Return Fields") .description("Comma-separated list of fields names to return") .required(false) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor SORT_CLAUSE = new PropertyDescriptor .Builder().name("Sort Clause") .description("A Solr sort clause, ex: field1 asc, field2 desc") .required(false) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor DATE_FIELD = new PropertyDescriptor .Builder().name("Date Field") .description("The name of a date field in Solr used to filter results") .required(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor .Builder().name("Batch Size") .description("Number of rows per Solr query") .required(true) .addValidator(StandardValidators.INTEGER_VALIDATOR) .defaultValue("100") .build(); public static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("The results of querying Solr") .build(); static final String FILE_PREFIX = "conf/.getSolr-"; static final String LAST_END_DATE = "LastEndDate"; static final String LAST_END_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; static final String UNINITIALIZED_LAST_END_DATE_VALUE; static { SimpleDateFormat sdf = new SimpleDateFormat(LAST_END_DATE_PATTERN, Locale.US); sdf.setTimeZone(TimeZone.getTimeZone("GMT")); UNINITIALIZED_LAST_END_DATE_VALUE = sdf.format(new Date(1L)); } final AtomicReference<String> lastEndDatedRef = new AtomicReference<>(UNINITIALIZED_LAST_END_DATE_VALUE); private Set<Relationship> relationships; private List<PropertyDescriptor> descriptors; private final Lock fileLock = new ReentrantLock(); @Override protected void init(final ProcessorInitializationContext context) { super.init(context); final List<PropertyDescriptor> descriptors = new ArrayList<>(); descriptors.add(SOLR_TYPE); descriptors.add(SOLR_LOCATION); descriptors.add(COLLECTION); descriptors.add(SOLR_QUERY); descriptors.add(RETURN_FIELDS); descriptors.add(SORT_CLAUSE); descriptors.add(DATE_FIELD); descriptors.add(BATCH_SIZE); descriptors.add(JAAS_CLIENT_APP_NAME); descriptors.add(BASIC_USERNAME); descriptors.add(BASIC_PASSWORD); descriptors.add(SSL_CONTEXT_SERVICE); descriptors.add(SOLR_SOCKET_TIMEOUT); descriptors.add(SOLR_CONNECTION_TIMEOUT); descriptors.add(SOLR_MAX_CONNECTIONS); descriptors.add(SOLR_MAX_CONNECTIONS_PER_HOST); descriptors.add(ZK_CLIENT_TIMEOUT); descriptors.add(ZK_CONNECTION_TIMEOUT); this.descriptors = Collections.unmodifiableList(descriptors); final Set<Relationship> relationships = new HashSet<>(); relationships.add(REL_SUCCESS); this.relationships = Collections.unmodifiableSet(relationships); } @Override public Set<Relationship> getRelationships() { return this.relationships; } @Override public List<PropertyDescriptor> getSupportedPropertyDescriptors() { return this.descriptors; } @Override public void onPropertyModified(PropertyDescriptor descriptor, String oldValue, String newValue) { lastEndDatedRef.set(UNINITIALIZED_LAST_END_DATE_VALUE); } @OnStopped public void onStopped() { writeLastEndDate(); } @OnRemoved public void onRemoved() { final File lastEndDateCache = new File(FILE_PREFIX + getIdentifier()); if (lastEndDateCache.exists()) { lastEndDateCache.delete(); } } @Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final ComponentLog logger = getLogger(); readLastEndDate(); final SimpleDateFormat sdf = new SimpleDateFormat(LAST_END_DATE_PATTERN, Locale.US); sdf.setTimeZone(TimeZone.getTimeZone("GMT")); final String currDate = sdf.format(new Date()); final boolean initialized = !UNINITIALIZED_LAST_END_DATE_VALUE.equals(lastEndDatedRef.get()); final String query = context.getProperty(SOLR_QUERY).getValue(); final SolrQuery solrQuery = new SolrQuery(query); solrQuery.setRows(context.getProperty(BATCH_SIZE).asInteger()); // if initialized then apply a filter to restrict results from the last end time til now if (initialized) { StringBuilder filterQuery = new StringBuilder(); filterQuery.append(context.getProperty(DATE_FIELD).getValue()) .append(":{").append(lastEndDatedRef.get()).append(" TO ") .append(currDate).append("]"); solrQuery.addFilterQuery(filterQuery.toString()); logger.info("Applying filter query {}", new Object[]{filterQuery.toString()}); } final String returnFields = context.getProperty(RETURN_FIELDS).getValue(); if (returnFields != null && !returnFields.trim().isEmpty()) { for (String returnField : returnFields.trim().split("[,]")) { solrQuery.addField(returnField.trim()); } } final String fullSortClause = context.getProperty(SORT_CLAUSE).getValue(); if (fullSortClause != null && !fullSortClause.trim().isEmpty()) { for (String sortClause : fullSortClause.split("[,]")) { String[] sortParts = sortClause.trim().split("[ ]"); solrQuery.addSort(sortParts[0], SolrQuery.ORDER.valueOf(sortParts[1])); } } try { final QueryRequest req = new QueryRequest(solrQuery); if (isBasicAuthEnabled()) { req.setBasicAuthCredentials(getUsername(), getPassword()); } // run the initial query and send out the first page of results final StopWatch stopWatch = new StopWatch(true); QueryResponse response = req.process(getSolrClient()); stopWatch.stop(); long duration = stopWatch.getDuration(TimeUnit.MILLISECONDS); final SolrDocumentList documentList = response.getResults(); logger.info("Retrieved {} results from Solr for {} in {} ms", new Object[] {documentList.getNumFound(), query, duration}); if (documentList != null && documentList.getNumFound() > 0) { FlowFile flowFile = session.create(); flowFile = session.write(flowFile, new QueryResponseOutputStreamCallback(response)); flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/xml"); session.transfer(flowFile, REL_SUCCESS); StringBuilder transitUri = new StringBuilder("solr://"); transitUri.append(getSolrLocation()); if (SOLR_TYPE_CLOUD.equals(context.getProperty(SOLR_TYPE).getValue())) { transitUri.append("/").append(context.getProperty(COLLECTION).getValue()); } session.getProvenanceReporter().receive(flowFile, transitUri.toString(), duration); // if initialized then page through the results and send out each page if (initialized) { int endRow = response.getResults().size(); long totalResults = response.getResults().getNumFound(); while (endRow < totalResults) { solrQuery.setStart(endRow); stopWatch.start(); response = getSolrClient().query(solrQuery); stopWatch.stop(); duration = stopWatch.getDuration(TimeUnit.MILLISECONDS); logger.info("Retrieved results for {} in {} ms", new Object[]{query, duration}); flowFile = session.create(); flowFile = session.write(flowFile, new QueryResponseOutputStreamCallback(response)); session.transfer(flowFile, REL_SUCCESS); session.getProvenanceReporter().receive(flowFile, transitUri.toString(), duration); endRow += response.getResults().size(); } } } lastEndDatedRef.set(currDate); writeLastEndDate(); } catch (SolrServerException | IOException e) { context.yield(); session.rollback(); logger.error("Failed to execute query {} due to {}", new Object[]{query, e}, e); throw new ProcessException(e); } catch (final Throwable t) { context.yield(); session.rollback(); logger.error("Failed to execute query {} due to {}", new Object[]{query, t}, t); throw t; } } private void readLastEndDate() { fileLock.lock(); File lastEndDateCache = new File(FILE_PREFIX + getIdentifier()); try (FileInputStream fis = new FileInputStream(lastEndDateCache)) { Properties props = new Properties(); props.load(fis); lastEndDatedRef.set(props.getProperty(LAST_END_DATE)); } catch (IOException swallow) { } finally { fileLock.unlock(); } } private void writeLastEndDate() { fileLock.lock(); File lastEndDateCache = new File(FILE_PREFIX + getIdentifier()); try (FileOutputStream fos = new FileOutputStream(lastEndDateCache)) { Properties props = new Properties(); props.setProperty(LAST_END_DATE, lastEndDatedRef.get()); props.store(fos, "GetSolr LastEndDate value"); } catch (IOException e) { getLogger().error("Failed to persist LastEndDate due to " + e, e); } finally { fileLock.unlock(); } } /** * Writes each SolrDocument in XML format to the OutputStream. */ private class QueryResponseOutputStreamCallback implements OutputStreamCallback { private QueryResponse response; public QueryResponseOutputStreamCallback(QueryResponse response) { this.response = response; } @Override public void process(OutputStream out) throws IOException { for (SolrDocument doc : response.getResults()) { String xml = ClientUtils.toXML(toSolrInputDocument(doc)); IOUtils.write(xml, out, StandardCharsets.UTF_8); } } public SolrInputDocument toSolrInputDocument(SolrDocument d) { SolrInputDocument doc = new SolrInputDocument(); for (String name : d.getFieldNames()) { doc.addField(name, d.getFieldValue(name)); } return doc; } } }