/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.provenance.index.lucene; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.nifi.provenance.ProgressiveResult; import org.apache.nifi.provenance.ProvenanceEventRecord; import org.apache.nifi.provenance.SearchableFields; import org.apache.nifi.provenance.authorization.EventAuthorizer; import org.apache.nifi.provenance.authorization.EventTransformer; import org.apache.nifi.provenance.index.EventIndexSearcher; import org.apache.nifi.provenance.index.SearchFailedException; import org.apache.nifi.provenance.lucene.IndexManager; import org.apache.nifi.provenance.store.EventStore; import org.apache.nifi.util.Tuple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class QueryTask implements Runnable { private static final Logger logger = LoggerFactory.getLogger(QueryTask.class); private static final Set<String> LUCENE_FIELDS_TO_LOAD = Collections.singleton(SearchableFields.Identifier.getSearchableFieldName()); private final Query query; private final ProgressiveResult queryResult; private final int maxResults; private final IndexManager indexManager; private final File indexDir; private final EventStore eventStore; private final EventAuthorizer authorizer; private final EventTransformer transformer; public QueryTask(final Query query, final ProgressiveResult result, final int maxResults, final IndexManager indexManager, final File indexDir, final EventStore eventStore, final EventAuthorizer authorizer, final EventTransformer unauthorizedTransformer) { this.query = query; this.queryResult = result; this.maxResults = maxResults; this.indexManager = indexManager; this.indexDir = indexDir; this.eventStore = eventStore; this.authorizer = authorizer; this.transformer = unauthorizedTransformer; } @Override public void run() { if (queryResult.getTotalHitCount() >= maxResults) { logger.debug("Will not query lucene index {} because maximum results have already been obtained", indexDir); queryResult.update(Collections.emptyList(), 0L); return; } if (queryResult.isFinished()) { logger.debug("Will not query lucene index {} because the query is already finished", indexDir); return; } final long borrowStart = System.nanoTime(); final EventIndexSearcher searcher; try { searcher = indexManager.borrowIndexSearcher(indexDir); } catch (final FileNotFoundException fnfe) { // We do not consider this an error because it may well just be the case that the event index has aged off and // been deleted or that we've just created the index and haven't yet committed the writer. So instead, we just // update the result ot indicate that this index search is complete with no results. queryResult.update(Collections.emptyList(), 0); // nothing has been indexed yet, or the data has already aged off logger.info("Attempted to search Provenance Index {} but could not find the directory or the directory did not contain a valid Lucene index. " + "This usually indicates that either the index was just created and hasn't fully been initialized, or that the index was recently aged off.", indexDir); return; } catch (final IOException ioe) { queryResult.setError("Failed to query index " + indexDir + "; see logs for more details"); logger.error("Failed to query index " + indexDir, ioe); return; } try { final long borrowMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - borrowStart); logger.debug("Borrowing index searcher for {} took {} ms", indexDir, borrowMillis); final long startNanos = System.nanoTime(); // If max number of results are retrieved, do not bother querying lucene if (queryResult.getTotalHitCount() >= maxResults) { logger.debug("Will not query lucene index {} because maximum results have already been obtained", indexDir); queryResult.update(Collections.emptyList(), 0L); return; } if (queryResult.isFinished()) { logger.debug("Will not query lucene index {} because the query is already finished", indexDir); return; } // Query lucene final IndexReader indexReader = searcher.getIndexSearcher().getIndexReader(); final TopDocs topDocs; try { topDocs = searcher.getIndexSearcher().search(query, maxResults); } catch (final Exception e) { logger.error("Failed to query Lucene for index " + indexDir, e); queryResult.setError("Failed to query Lucene for index " + indexDir + " due to " + e); return; } finally { final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); logger.debug("Querying Lucene for index {} took {} ms", indexDir, ms); } // If max number of results are retrieved, do not bother reading docs if (queryResult.getTotalHitCount() >= maxResults) { logger.debug("Will not read events from store for {} because maximum results have already been obtained", indexDir); queryResult.update(Collections.emptyList(), 0L); return; } if (queryResult.isFinished()) { logger.debug("Will not read events from store for {} because the query has already finished", indexDir); return; } final Tuple<List<ProvenanceEventRecord>, Integer> eventsAndTotalHits = readDocuments(topDocs, indexReader); if (eventsAndTotalHits == null) { queryResult.update(Collections.emptyList(), 0L); logger.info("Will not update query results for queried index {} for query {} because the maximum number of results have been reached already", indexDir, query); } else { queryResult.update(eventsAndTotalHits.getKey(), eventsAndTotalHits.getValue()); final long searchNanos = System.nanoTime() - startNanos; final long millis = TimeUnit.NANOSECONDS.toMillis(searchNanos); logger.info("Successfully queried index {} for query {}; retrieved {} events with a total of {} hits in {} millis", indexDir, query, eventsAndTotalHits.getKey().size(), eventsAndTotalHits.getValue(), millis); } } catch (final Exception e) { logger.error("Failed to query events against index " + indexDir, e); queryResult.setError("Failed to complete query due to " + e); } finally { indexManager.returnIndexSearcher(searcher); } } private Tuple<List<ProvenanceEventRecord>, Integer> readDocuments(final TopDocs topDocs, final IndexReader indexReader) { // If no topDocs is supplied, just provide a Tuple that has no records and a hit count of 0. if (topDocs == null || topDocs.totalHits == 0) { return new Tuple<>(Collections.<ProvenanceEventRecord> emptyList(), 0); } final long start = System.nanoTime(); final List<Long> eventIds = Arrays.stream(topDocs.scoreDocs) .mapToInt(scoreDoc -> scoreDoc.doc) .mapToObj(docId -> { try { return indexReader.document(docId, LUCENE_FIELDS_TO_LOAD); } catch (final Exception e) { throw new SearchFailedException("Failed to read Provenance Events from Event File", e); } }) .map(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue()) .collect(Collectors.toList()); final long endConvert = System.nanoTime(); final long ms = TimeUnit.NANOSECONDS.toMillis(endConvert - start); logger.debug("Converting documents took {} ms", ms); List<ProvenanceEventRecord> events; try { events = eventStore.getEvents(eventIds, authorizer, transformer); } catch (IOException e) { throw new SearchFailedException("Unable to retrieve events from the Provenance Store", e); } final long fetchEventNanos = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - endConvert); logger.debug("Fetching {} events from Event Store took {} ms ({} events actually fetched)", eventIds.size(), fetchEventNanos, events.size()); final int totalHits = topDocs.totalHits; return new Tuple<>(events, totalHits); } }