/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.provenance.lucene; import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.nifi.provenance.ProvenanceEventRecord; import org.apache.nifi.provenance.SearchableFields; import org.apache.nifi.provenance.StandardProvenanceEventRecord; import org.apache.nifi.provenance.authorization.EventAuthorizer; import org.apache.nifi.provenance.serialization.RecordReader; import org.apache.nifi.provenance.serialization.RecordReaders; import org.apache.nifi.provenance.toc.TocReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class DocsReader { private final Logger logger = LoggerFactory.getLogger(DocsReader.class); public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer, final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException { if (retrievalCount.get() >= maxResults) { return Collections.emptySet(); } final long start = System.nanoTime(); final ScoreDoc[] scoreDocs = topDocs.scoreDocs; final int numDocs = Math.min(scoreDocs.length, maxResults); final List<Document> docs = new ArrayList<>(numDocs); for (int i = numDocs - 1; i >= 0; i--) { final int docId = scoreDocs[i].doc; final Document d = indexReader.document(docId); docs.add(d); } final long readDocuments = System.nanoTime() - start; logger.debug("Reading {} Lucene Documents took {} millis", docs.size(), TimeUnit.NANOSECONDS.toMillis(readDocuments)); return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars); } private long getByteOffset(final Document d, final RecordReader reader) { final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX); if ( blockField != null ) { final int blockIndex = blockField.numericValue().intValue(); final TocReader tocReader = reader.getTocReader(); return tocReader.getBlockOffset(blockIndex); } return d.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue(); } private ProvenanceEventRecord getRecord(final Document d, final RecordReader reader) throws IOException { final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX); if ( blockField == null ) { reader.skipTo(getByteOffset(d, reader)); } else { reader.skipToBlock(blockField.numericValue().intValue()); } StandardProvenanceEventRecord record; while ( (record = reader.nextRecord()) != null) { final IndexableField idField = d.getField(SearchableFields.Identifier.getSearchableFieldName()); if ( idField == null || idField.numericValue().longValue() == record.getEventId() ) { break; } } if (record == null) { logger.warn("Failed to read Provenance Event for '" + d + "'. The event file may be missing or corrupted"); } return record; } public Set<ProvenanceEventRecord> read(final List<Document> docs, final EventAuthorizer authorizer, final Collection<Path> allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException { if (retrievalCount.get() >= maxResults) { return Collections.emptySet(); } final long start = System.nanoTime(); final Set<ProvenanceEventRecord> matchingRecords = new LinkedHashSet<>(); final Map<String, List<Document>> byStorageNameDocGroups = LuceneUtil.groupDocsByStorageFileName(docs); int eventsReadThisFile = 0; int logFileCount = 0; for (String storageFileName : byStorageNameDocGroups.keySet()) { final File provenanceEventFile = LuceneUtil.getProvenanceLogFile(storageFileName, allProvenanceLogFiles); if (provenanceEventFile == null) { logger.warn("Could not find Provenance Log File with " + "basename {} in the Provenance Repository; assuming " + "file has expired and continuing without it", storageFileName); continue; } try (final RecordReader reader = RecordReaders.newRecordReader(provenanceEventFile, allProvenanceLogFiles, maxAttributeChars)) { final Iterator<Document> docIter = byStorageNameDocGroups.get(storageFileName).iterator(); while (docIter.hasNext() && retrievalCount.getAndIncrement() < maxResults) { final ProvenanceEventRecord event = getRecord(docIter.next(), reader); if (event != null && authorizer.isAuthorized(event)) { matchingRecords.add(event); eventsReadThisFile++; } } } catch (final Exception e) { logger.warn("Failed to read Provenance Events. The event file '" + provenanceEventFile.getAbsolutePath() + "' may be missing or corrupt.", e); } } logger.debug("Read {} records from previous file", eventsReadThisFile); final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start); logger.debug("Took {} ms to read {} events from {} prov log files", millis, matchingRecords.size(), logFileCount); return matchingRecords; } }