/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.provenance; import java.io.DataOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.UTFDataFormatException; import java.util.Collection; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; import org.apache.nifi.provenance.serialization.CompressableRecordWriter; import org.apache.nifi.provenance.serialization.RecordWriter; import org.apache.nifi.provenance.toc.TocWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @deprecated Deprecated in favor of SchemaRecordWriter */ @Deprecated public class StandardRecordWriter extends CompressableRecordWriter implements RecordWriter { public static final int MAX_ALLOWED_UTF_LENGTH = 65_535; private static final Logger logger = LoggerFactory.getLogger(StandardRecordWriter.class); public static final int SERIALIZATION_VERISON = 9; public static final String SERIALIZATION_NAME = "org.apache.nifi.provenance.PersistentProvenanceRepository"; private final File file; public StandardRecordWriter(final File file, final AtomicLong idGenerator, final TocWriter writer, final boolean compressed, final int uncompressedBlockSize) throws IOException { super(file, idGenerator, writer, compressed, uncompressedBlockSize); logger.trace("Creating Record Writer for {}", file.getName()); this.file = file; } public StandardRecordWriter(final OutputStream out, final String storageLocation, final AtomicLong idGenerator, final TocWriter tocWriter, final boolean compressed, final int uncompressedBlockSize) throws IOException { super(out, storageLocation, idGenerator, tocWriter, compressed, uncompressedBlockSize); this.file = null; } @Override protected String getSerializationName() { return SERIALIZATION_NAME; } @Override protected int getSerializationVersion() { return SERIALIZATION_VERISON; } @Override protected void writeHeader(long firstEventId, DataOutputStream out) throws IOException { } @Override protected void writeRecord(final ProvenanceEventRecord record, final long recordIdentifier, final DataOutputStream out) throws IOException { final ProvenanceEventType recordType = record.getEventType(); out.writeLong(recordIdentifier); writeUTFLimited(out, record.getEventType().name(), "EventType"); out.writeLong(record.getEventTime()); out.writeLong(record.getFlowFileEntryDate()); out.writeLong(record.getEventDuration()); out.writeLong(record.getLineageStartDate()); writeNullableString(out, record.getComponentId(), "ComponentId"); writeNullableString(out, record.getComponentType(), "ComponentType"); writeUUID(out, record.getFlowFileUuid()); writeNullableString(out, record.getDetails(), "Details"); // Write FlowFile attributes final Map<String, String> attrs = record.getPreviousAttributes(); out.writeInt(attrs.size()); for (final Map.Entry<String, String> entry : attrs.entrySet()) { writeLongString(out, entry.getKey()); writeLongString(out, entry.getValue()); } final Map<String, String> attrUpdates = record.getUpdatedAttributes(); out.writeInt(attrUpdates.size()); for (final Map.Entry<String, String> entry : attrUpdates.entrySet()) { writeLongString(out, entry.getKey()); writeLongNullableString(out, entry.getValue()); } // If Content Claim Info is present, write out a 'TRUE' followed by claim info. Else, write out 'false'. if (record.getContentClaimSection() != null && record.getContentClaimContainer() != null && record.getContentClaimIdentifier() != null) { out.writeBoolean(true); writeUTFLimited(out, record.getContentClaimContainer(), "ContentClaimContainer"); writeUTFLimited(out, record.getContentClaimSection(), "ContentClaimSection"); writeUTFLimited(out, record.getContentClaimIdentifier(), "ContentClaimIdentifier"); if (record.getContentClaimOffset() == null) { out.writeLong(0L); } else { out.writeLong(record.getContentClaimOffset()); } out.writeLong(record.getFileSize()); } else { out.writeBoolean(false); } // If Previous Content Claim Info is present, write out a 'TRUE' followed by claim info. Else, write out 'false'. if (record.getPreviousContentClaimSection() != null && record.getPreviousContentClaimContainer() != null && record.getPreviousContentClaimIdentifier() != null) { out.writeBoolean(true); writeUTFLimited(out, record.getPreviousContentClaimContainer(), "PreviousContentClaimContainer"); writeUTFLimited(out, record.getPreviousContentClaimSection(), "PreviousContentClaimSection"); writeUTFLimited(out, record.getPreviousContentClaimIdentifier(), "PreviousContentClaimIdentifier"); if (record.getPreviousContentClaimOffset() == null) { out.writeLong(0L); } else { out.writeLong(record.getPreviousContentClaimOffset()); } if (record.getPreviousFileSize() == null) { out.writeLong(0L); } else { out.writeLong(record.getPreviousFileSize()); } } else { out.writeBoolean(false); } // write out the identifier of the destination queue. writeNullableString(out, record.getSourceQueueIdentifier(), "SourceQueueIdentifier"); // Write type-specific info if (recordType == ProvenanceEventType.FORK || recordType == ProvenanceEventType.JOIN || recordType == ProvenanceEventType.CLONE || recordType == ProvenanceEventType.REPLAY) { writeUUIDs(out, record.getParentUuids()); writeUUIDs(out, record.getChildUuids()); } else if (recordType == ProvenanceEventType.RECEIVE) { writeNullableString(out, record.getTransitUri(), "TransitUri"); writeNullableString(out, record.getSourceSystemFlowFileIdentifier(), "SourceSystemFlowFileIdentifier"); } else if (recordType == ProvenanceEventType.FETCH) { writeNullableString(out, record.getTransitUri(), "TransitUri"); } else if (recordType == ProvenanceEventType.SEND) { writeNullableString(out, record.getTransitUri(), "TransitUri"); } else if (recordType == ProvenanceEventType.ADDINFO) { writeNullableString(out, record.getAlternateIdentifierUri(), "AlternateIdentifierUri"); } else if (recordType == ProvenanceEventType.ROUTE) { writeNullableString(out, record.getRelationship(), "Relationship"); } } protected void writeUUID(final DataOutputStream out, final String uuid) throws IOException { writeUTFLimited(out, uuid, "UUID"); } protected void writeUUIDs(final DataOutputStream out, final Collection<String> list) throws IOException { if (list == null) { out.writeInt(0); } else { out.writeInt(list.size()); for (final String value : list) { writeUUID(out, value); } } } protected void writeNullableString(final DataOutputStream out, final String toWrite, String fieldName) throws IOException { if (toWrite == null) { out.writeBoolean(false); } else { out.writeBoolean(true); writeUTFLimited(out, toWrite, fieldName); } } private void writeLongNullableString(final DataOutputStream out, final String toWrite) throws IOException { if (toWrite == null) { out.writeBoolean(false); } else { out.writeBoolean(true); writeLongString(out, toWrite); } } private void writeLongString(final DataOutputStream out, final String value) throws IOException { final byte[] bytes = value.getBytes("UTF-8"); out.writeInt(bytes.length); out.write(bytes); } private void writeUTFLimited(final DataOutputStream out, final String utfString, final String fieldName) throws IOException { try { out.writeUTF(utfString); } catch (UTFDataFormatException e) { final String truncated = utfString.substring(0, getCharsInUTF8Limit(utfString, MAX_ALLOWED_UTF_LENGTH)); logger.warn("Truncating repository record value for field '{}'! Attempted to write {} chars that encode to a UTF8 byte length greater than " + "supported maximum ({}), truncating to {} chars.", (fieldName == null) ? "" : fieldName, utfString.length(), MAX_ALLOWED_UTF_LENGTH, truncated.length()); if (logger.isDebugEnabled()) { logger.warn("String value was:\n{}", truncated); } out.writeUTF(truncated); } } static int getCharsInUTF8Limit(final String str, final int utf8Limit) { // Calculate how much of String fits within UTF8 byte limit based on RFC3629. // // Java String values use char[] for storage, so character values >0xFFFF that // map to 4 byte UTF8 representations are not considered. final int charsInOriginal = str.length(); int bytesInUTF8 = 0; for (int i = 0; i < charsInOriginal; i++) { final int curr = str.charAt(i); if (curr < 0x0080) { bytesInUTF8++; } else if (curr < 0x0800) { bytesInUTF8 += 2; } else { bytesInUTF8 += 3; } if (bytesInUTF8 > utf8Limit) { return i; } } return charsInOriginal; } @Override public String toString() { return "StandardRecordWriter[file=" + file + "]"; } }