/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.provenance;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.regex.Pattern;
import org.apache.nifi.provenance.serialization.CompressableRecordReader;
import org.apache.nifi.provenance.toc.TocReader;
import org.apache.nifi.stream.io.StreamUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class StandardRecordReader extends CompressableRecordReader {
public static final int SERIALIZATION_VERISON = 9;
public static final String SERIALIZATION_NAME = "org.apache.nifi.provenance.PersistentProvenanceRepository";
private static final Logger logger = LoggerFactory.getLogger(StandardRecordReader.class);
private static final Pattern UUID_PATTERN = Pattern.compile("[a-fA-F0-9]{8}\\-([a-fA-F0-9]{4}\\-){3}[a-fA-F0-9]{12}");
public StandardRecordReader(final InputStream in, final String filename, final int maxAttributeChars) throws IOException {
this(in, filename, null, maxAttributeChars);
}
public StandardRecordReader(final InputStream in, final String filename, final TocReader tocReader, final int maxAttributeChars) throws IOException {
super(in, filename, tocReader, maxAttributeChars);
logger.trace("Creating RecordReader for {}", filename);
}
private StandardProvenanceEventRecord readPreVersion6Record(final DataInputStream dis, final int serializationVersion) throws IOException {
final long startOffset = getBytesConsumed();
final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder();
final long eventId = dis.readLong();
if (serializationVersion == 4) {
// notion of a UUID for the event was added in Version 4 so that Events can be referred to uniquely
// across a cluster. This was then removed in version 5 because it was decided that a unique id
// could better be generated based on the event id and the cluster node identifier.
// Therefore, we read in the Event Identifier and throw it away.
dis.readUTF();
}
final String eventTypeName = dis.readUTF();
final ProvenanceEventType eventType = ProvenanceEventType.valueOf(eventTypeName);
builder.setEventType(eventType);
builder.setEventTime(dis.readLong());
if (serializationVersion > 3) {
// event duration introduced in version 4.
builder.setEventDuration(dis.readLong());
}
dis.readLong(); // Used to persist FlowFileId
final long fileSize = dis.readLong();
builder.setComponentId(readNullableString(dis));
builder.setComponentType(readNullableString(dis));
builder.setFlowFileUUID(readNullableString(dis));
final int numParents = dis.readInt();
for (int i = 0; i < numParents; i++) {
builder.addParentUuid(dis.readUTF());
}
if (serializationVersion > 2) {
// notion of child UUID's was introduced in version 3.
final int numChildren = dis.readInt();
for (int i = 0; i < numChildren; i++) {
builder.addChildUuid(dis.readUTF());
}
}
final String sourceSystemUri = readNullableString(dis);
if (serializationVersion > 3) {
// notion of a source system flowfile identifier was introduced in version 4.
builder.setSourceSystemFlowFileIdentifier(readNullableString(dis));
}
final String destinationSystemUri = readNullableString(dis);
if (sourceSystemUri != null) {
builder.setTransitUri(sourceSystemUri);
} else if (destinationSystemUri != null) {
builder.setTransitUri(destinationSystemUri);
}
readNullableString(dis); // Content-Type No longer used
builder.setAlternateIdentifierUri(readNullableString(dis));
final Map<String, String> attrs = readAttributes(dis, false);
builder.setFlowFileEntryDate(System.currentTimeMillis());
builder.setLineageStartDate(-1L);
builder.setAttributes(Collections.<String, String>emptyMap(), attrs);
builder.setCurrentContentClaim(null, null, null, null, fileSize);
builder.setStorageLocation(getFilename(), startOffset);
final StandardProvenanceEventRecord record = builder.build();
record.setEventId(eventId);
return record;
}
@Override
public StandardProvenanceEventRecord nextRecord(final DataInputStream dis, final int serializationVersion) throws IOException {
if (serializationVersion > SERIALIZATION_VERISON) {
throw new IllegalArgumentException("Unable to deserialize record because the version is "
+ serializationVersion + " and supported versions are 1-" + SERIALIZATION_VERISON);
}
// Schema changed drastically in version 6 so we created a new method to handle old records
if (serializationVersion < 6) {
return readPreVersion6Record(dis, serializationVersion);
}
final long startOffset = getBytesConsumed();
final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder();
final long eventId = dis.readLong();
final String eventTypeName = dis.readUTF();
final ProvenanceEventType eventType = ProvenanceEventType.valueOf(eventTypeName);
builder.setEventType(eventType);
builder.setEventTime(dis.readLong());
final Long flowFileEntryDate = dis.readLong();
builder.setEventDuration(dis.readLong());
if (serializationVersion < 9){
final int numLineageIdentifiers = dis.readInt();
for (int i = 0; i < numLineageIdentifiers; i++) {
readUUID(dis, serializationVersion); //skip identifiers
}
}
final long lineageStartDate = dis.readLong();
final long fileSize;
if (serializationVersion < 7) {
fileSize = dis.readLong(); // file size moved in version 7 to be with content claims
builder.setCurrentContentClaim(null, null, null, null, fileSize);
}
builder.setComponentId(readNullableString(dis));
builder.setComponentType(readNullableString(dis));
final String uuid = readUUID(dis, serializationVersion);
builder.setFlowFileUUID(uuid);
builder.setDetails(readNullableString(dis));
// Read in the FlowFile Attributes
if (serializationVersion >= 7) {
final Map<String, String> previousAttrs = readAttributes(dis, false);
final Map<String, String> attrUpdates = readAttributes(dis, true);
builder.setAttributes(previousAttrs, attrUpdates);
final boolean hasContentClaim = dis.readBoolean();
if (hasContentClaim) {
builder.setCurrentContentClaim(dis.readUTF(), dis.readUTF(), dis.readUTF(), dis.readLong(), dis.readLong());
} else {
builder.setCurrentContentClaim(null, null, null, null, 0L);
}
final boolean hasPreviousClaim = dis.readBoolean();
if (hasPreviousClaim) {
builder.setPreviousContentClaim(dis.readUTF(), dis.readUTF(), dis.readUTF(), dis.readLong(), dis.readLong());
}
builder.setSourceQueueIdentifier(readNullableString(dis));
} else {
final Map<String, String> attrs = readAttributes(dis, false);
builder.setAttributes(Collections.<String, String>emptyMap(), attrs);
}
// Read Event-Type specific fields.
if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.JOIN || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
final int numParents = dis.readInt();
for (int i = 0; i < numParents; i++) {
builder.addParentUuid(readUUID(dis, serializationVersion));
}
final int numChildren = dis.readInt();
for (int i = 0; i < numChildren; i++) {
builder.addChildUuid(readUUID(dis, serializationVersion));
}
} else if (eventType == ProvenanceEventType.RECEIVE) {
builder.setTransitUri(readNullableString(dis));
builder.setSourceSystemFlowFileIdentifier(readNullableString(dis));
} else if (eventType == ProvenanceEventType.FETCH) {
builder.setTransitUri(readNullableString(dis));
} else if (eventType == ProvenanceEventType.SEND) {
builder.setTransitUri(readNullableString(dis));
} else if (eventType == ProvenanceEventType.ADDINFO) {
builder.setAlternateIdentifierUri(readNullableString(dis));
} else if (eventType == ProvenanceEventType.ROUTE) {
builder.setRelationship(readNullableString(dis));
}
builder.setFlowFileEntryDate(flowFileEntryDate);
builder.setLineageStartDate(lineageStartDate);
builder.setStorageLocation(getFilename(), startOffset);
final StandardProvenanceEventRecord record = builder.build();
record.setEventId(eventId);
return record;
}
private Map<String, String> readAttributes(final DataInputStream dis, final boolean valueNullable) throws IOException {
final int numAttributes = dis.readInt();
final Map<String, String> attrs = new HashMap<>();
for (int i = 0; i < numAttributes; i++) {
final String key = readLongString(dis);
final String value = valueNullable ? readLongNullableString(dis) : readLongString(dis);
final String truncatedValue;
if (value == null) {
truncatedValue = null;
} else if (value.length() > getMaxAttributeLength()) {
truncatedValue = value.substring(0, getMaxAttributeLength());
} else {
truncatedValue = value;
}
attrs.put(key, truncatedValue);
}
return attrs;
}
private String readUUID(final DataInputStream in, final int serializationVersion) throws IOException {
if (serializationVersion < 8) {
final long msb = in.readLong();
final long lsb = in.readLong();
return new UUID(msb, lsb).toString();
} else {
// before version 8, we serialized UUID's as two longs in order to
// write less data. However, in version 8 we changed to just writing
// out the string because it's extremely expensive to call UUID.fromString.
// In the end, since we generally compress, the savings in minimal anyway.
final String uuid = in.readUTF();
if (!UUID_PATTERN.matcher(uuid).matches()) {
throw new IOException("Failed to parse Provenance Event Record: expected a UUID but got: " + uuid);
}
return uuid;
}
}
private String readNullableString(final DataInputStream in) throws IOException {
final boolean valueExists = in.readBoolean();
if (valueExists) {
return in.readUTF();
} else {
return null;
}
}
private String readLongNullableString(final DataInputStream in) throws IOException {
final boolean valueExists = in.readBoolean();
if (valueExists) {
return readLongString(in);
} else {
return null;
}
}
private String readLongString(final DataInputStream in) throws IOException {
final int length = in.readInt();
final byte[] strBytes = new byte[length];
StreamUtils.fillBuffer(in, strBytes);
return new String(strBytes, "UTF-8");
}
}