/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.provenance;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.nifi.authorization.Authorizer;
import org.apache.nifi.authorization.RequestAction;
import org.apache.nifi.authorization.resource.Authorizable;
import org.apache.nifi.authorization.user.NiFiUser;
import org.apache.nifi.events.EventReporter;
import org.apache.nifi.provenance.authorization.EventAuthorizer;
import org.apache.nifi.provenance.authorization.UserEventAuthorizer;
import org.apache.nifi.provenance.index.EventIndex;
import org.apache.nifi.provenance.index.lucene.LuceneEventIndex;
import org.apache.nifi.provenance.lineage.ComputeLineageSubmission;
import org.apache.nifi.provenance.lucene.IndexManager;
import org.apache.nifi.provenance.lucene.SimpleIndexManager;
import org.apache.nifi.provenance.search.Query;
import org.apache.nifi.provenance.search.QuerySubmission;
import org.apache.nifi.provenance.search.SearchableField;
import org.apache.nifi.provenance.serialization.RecordReaders;
import org.apache.nifi.provenance.serialization.StorageSummary;
import org.apache.nifi.provenance.store.EventFileManager;
import org.apache.nifi.provenance.store.EventStore;
import org.apache.nifi.provenance.store.PartitionedWriteAheadEventStore;
import org.apache.nifi.provenance.store.RecordReaderFactory;
import org.apache.nifi.provenance.store.RecordWriterFactory;
import org.apache.nifi.provenance.store.StorageResult;
import org.apache.nifi.provenance.toc.StandardTocWriter;
import org.apache.nifi.provenance.toc.TocUtil;
import org.apache.nifi.provenance.toc.TocWriter;
import org.apache.nifi.provenance.util.CloseableUtil;
import org.apache.nifi.reporting.Severity;
import org.apache.nifi.util.NiFiProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* A Provenance Repository that is made up of two distinct concepts: An {@link EventStore Event Store} that is responsible
* for storing and accessing the events (this repository makes use of an Event Store that uses a backing Write-Ahead Log, hence the name
* WriteAheadProvenanceRepository) and an {@link EventIndex Event Index} that is responsible for indexing and searching those
* events.
* </p>
*
* <p>
* When a Provenance Event is added to the repository, it is first stored in the Event Store. The Event Store reports the location (namely, the
* Event Identifier) that it used to store the event. The stored event is then given to the Event Index along with its storage location. The index
* is then responsible for indexing the event in real-time. Once this has completed, the method returns.
* </p>
*
* <p>
* The Event Index that is used by this implementation currently is the {@link LuceneEventIndex}, which is powered by Apache Lucene. This index provides
* very high throughput. However, this high throughput is gained by avoiding continual 'commits' of the Index Writer. As a result, on restart, this Repository
* may take a minute or two to re-index some of the Provenance Events, as some of the Events may have been added to the index without committing the Index Writer.
* Given the substantial performance improvement gained by committing the Index Writer only periodically, this trade-off is generally well accepted.
* </p>
*
* <p>
* This Repositories supports the notion of 'partitions'. The repository can be configured to store data to one or more partitions. Each partition is typically
* stored on a separate physical partition on disk. As a result, this allows striping of data across multiple partitions in order to achieve linear scalability
* across disks for far greater performance.
* </p>
*/
public class WriteAheadProvenanceRepository implements ProvenanceRepository {
private static final Logger logger = LoggerFactory.getLogger(WriteAheadProvenanceRepository.class);
static final int BLOCK_SIZE = 1024 * 32;
public static final String EVENT_CATEGORY = "Provenance Repository";
private final RepositoryConfiguration config;
// effectively final
private EventStore eventStore;
private EventIndex eventIndex;
private EventReporter eventReporter;
private Authorizer authorizer;
private ProvenanceAuthorizableFactory resourceFactory;
/**
* This constructor exists solely for the use of the Java Service Loader mechanism and should not be used.
*/
public WriteAheadProvenanceRepository() {
config = null;
}
public WriteAheadProvenanceRepository(final NiFiProperties nifiProperties) {
this(RepositoryConfiguration.create(nifiProperties));
}
public WriteAheadProvenanceRepository(final RepositoryConfiguration config) {
this.config = config;
}
@Override
public synchronized void initialize(final EventReporter eventReporter, final Authorizer authorizer, final ProvenanceAuthorizableFactory resourceFactory,
final IdentifierLookup idLookup) throws IOException {
final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, BLOCK_SIZE, idLookup);
};
final EventFileManager fileManager = new EventFileManager();
final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> {
fileManager.obtainReadLock(file);
try {
return RecordReaders.newRecordReader(file, logs, maxChars);
} finally {
fileManager.releaseReadLock(file);
}
};
init(recordWriterFactory, recordReaderFactory, eventReporter, authorizer, resourceFactory);
}
synchronized void init(RecordWriterFactory recordWriterFactory, RecordReaderFactory recordReaderFactory,
final EventReporter eventReporter, final Authorizer authorizer,
final ProvenanceAuthorizableFactory resourceFactory) throws IOException {
final EventFileManager fileManager = new EventFileManager();
eventStore = new PartitionedWriteAheadEventStore(config, recordWriterFactory, recordReaderFactory, eventReporter, fileManager);
final IndexManager indexManager = new SimpleIndexManager(config);
eventIndex = new LuceneEventIndex(config, indexManager, eventReporter);
this.eventReporter = eventReporter;
this.authorizer = authorizer;
this.resourceFactory = resourceFactory;
eventStore.initialize();
eventIndex.initialize(eventStore);
try {
eventStore.reindexLatestEvents(eventIndex);
} catch (final Exception e) {
logger.error("Failed to re-index some of the Provenance Events. It is possible that some of the latest "
+ "events will not be available from the Provenance Repository when a query is issued.", e);
}
}
@Override
public ProvenanceEventBuilder eventBuilder() {
return new StandardProvenanceEventRecord.Builder();
}
@Override
public void registerEvent(final ProvenanceEventRecord event) {
registerEvents(Collections.singleton(event));
}
@Override
public void registerEvents(final Iterable<ProvenanceEventRecord> events) {
final StorageResult storageResult;
try {
storageResult = eventStore.addEvents(events);
} catch (final IOException e) {
logger.error("Failed to write events to the Event Store", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to write Provenance Events to the repository. See logs for more details.");
return;
}
final Map<ProvenanceEventRecord, StorageSummary> locationMap = storageResult.getStorageLocations();
if (!locationMap.isEmpty()) {
eventIndex.addEvents(locationMap);
}
}
@Override
public List<ProvenanceEventRecord> getEvents(final long firstRecordId, final int maxRecords) throws IOException {
return eventStore.getEvents(firstRecordId, maxRecords);
}
@Override
public ProvenanceEventRecord getEvent(final long id) throws IOException {
return eventStore.getEvent(id).orElse(null);
}
@Override
public Long getMaxEventId() {
return eventStore.getMaxEventId();
}
@Override
public void close() {
CloseableUtil.closeQuietly(eventStore, eventIndex);
}
@Override
public ProvenanceEventRecord getEvent(final long id, final NiFiUser user) throws IOException {
final ProvenanceEventRecord event = getEvent(id);
if (event == null) {
return null;
}
authorize(event, user);
return event;
}
private void authorize(final ProvenanceEventRecord event, final NiFiUser user) {
if (authorizer == null) {
return;
}
final Authorizable eventAuthorizable;
if (event.isRemotePortType()) {
eventAuthorizable = resourceFactory.createRemoteDataAuthorizable(event.getComponentId());
} else {
eventAuthorizable = resourceFactory.createLocalDataAuthorizable(event.getComponentId());
}
eventAuthorizable.authorize(authorizer, RequestAction.READ, user, event.getAttributes());
}
@Override
public List<ProvenanceEventRecord> getEvents(final long firstRecordId, final int maxRecords, final NiFiUser user) throws IOException {
final List<ProvenanceEventRecord> events = getEvents(firstRecordId, maxRecords);
return createEventAuthorizer(user).filterUnauthorizedEvents(events);
}
private EventAuthorizer createEventAuthorizer(final NiFiUser user) {
return new UserEventAuthorizer(authorizer, resourceFactory, user);
}
@Override
public ProvenanceEventRepository getProvenanceEventRepository() {
return this;
}
@Override
public QuerySubmission submitQuery(final Query query, final NiFiUser user) {
return eventIndex.submitQuery(query, createEventAuthorizer(user), user.getIdentity());
}
@Override
public QuerySubmission retrieveQuerySubmission(final String queryIdentifier, final NiFiUser user) {
return eventIndex.retrieveQuerySubmission(queryIdentifier, user);
}
@Override
public ComputeLineageSubmission submitLineageComputation(final String flowFileUuid, final NiFiUser user) {
return eventIndex.submitLineageComputation(flowFileUuid, user, createEventAuthorizer(user));
}
@Override
public ComputeLineageSubmission submitLineageComputation(final long eventId, final NiFiUser user) {
return eventIndex.submitLineageComputation(eventId, user, createEventAuthorizer(user));
}
@Override
public ComputeLineageSubmission retrieveLineageSubmission(final String lineageIdentifier, final NiFiUser user) {
return eventIndex.retrieveLineageSubmission(lineageIdentifier, user);
}
@Override
public ComputeLineageSubmission submitExpandParents(final long eventId, final NiFiUser user) {
return eventIndex.submitExpandParents(eventId, user, createEventAuthorizer(user));
}
@Override
public ComputeLineageSubmission submitExpandChildren(final long eventId, final NiFiUser user) {
return eventIndex.submitExpandChildren(eventId, user, createEventAuthorizer(user));
}
@Override
public List<SearchableField> getSearchableFields() {
return Collections.unmodifiableList(config.getSearchableFields());
}
@Override
public List<SearchableField> getSearchableAttributes() {
return Collections.unmodifiableList(config.getSearchableAttributes());
}
RepositoryConfiguration getConfig() {
return this.config;
}
}