/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.writer; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import org.apache.commons.lang3.ArrayUtils; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.primitives.Longs; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.State; /** * An implementation of {@link DataWriter} that writes bytes directly to HDFS. * * This class accepts two new configuration parameters: * <ul> * <li>{@link ConfigurationKeys#SIMPLE_WRITER_PREPEND_SIZE} is a boolean configuration option. If true, for each record, * it will write out a big endian long representing the record size and then write the record. i.e. the file format * will be the following: * r := >long<>record< * file := empty | r file * <li>{@link ConfigurationKeys#SIMPLE_WRITER_DELIMITER} accepts a byte value. If specified, this byte will be used * as a separator between records. If unspecified, no delimiter will be used between records. * </ul> * @author akshay@nerdwallet.com */ public class SimpleDataWriter extends FsDataWriter<byte[]> { private final Optional<Byte> recordDelimiter; // optional byte to place between each record write private final boolean prependSize; private int recordsWritten; private int bytesWritten; private final OutputStream stagingFileOutputStream; public SimpleDataWriter(SimpleDataWriterBuilder builder, State properties) throws IOException { super(builder, properties); String delim; if ((delim = properties.getProp(ConfigurationKeys.SIMPLE_WRITER_DELIMITER, null)) == null || delim.length() == 0) { this.recordDelimiter = Optional.absent(); } else { this.recordDelimiter = Optional.of(delim.getBytes(ConfigurationKeys.DEFAULT_CHARSET_ENCODING)[0]); } this.prependSize = properties.getPropAsBoolean(ConfigurationKeys.SIMPLE_WRITER_PREPEND_SIZE, false); this.recordsWritten = 0; this.bytesWritten = 0; this.stagingFileOutputStream = createStagingFileOutputStream(); setStagingFileGroup(); } /** * Write a source record to the staging file * * @param record data record to write * @throws java.io.IOException if there is anything wrong writing the record */ @Override public void write(byte[] record) throws IOException { Preconditions.checkNotNull(record); byte[] toWrite = record; if (this.recordDelimiter.isPresent()) { toWrite = Arrays.copyOf(record, record.length + 1); toWrite[toWrite.length - 1] = this.recordDelimiter.get(); } if (this.prependSize) { long recordSize = toWrite.length; ByteBuffer buf = ByteBuffer.allocate(Longs.BYTES); buf.putLong(recordSize); toWrite = ArrayUtils.addAll(buf.array(), toWrite); } this.stagingFileOutputStream.write(toWrite); this.bytesWritten += toWrite.length; this.recordsWritten++; } /** * Get the number of records written. * * @return number of records written */ @Override public long recordsWritten() { return this.recordsWritten; } /** * Get the number of bytes written. * * @return number of bytes written */ @Override public long bytesWritten() throws IOException { return this.bytesWritten; } @Override public boolean isSpeculativeAttemptSafe() { return this.writerAttemptIdOptional.isPresent() && this.getClass() == SimpleDataWriter.class; } }