/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.runners.dataflow.util; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.base.MoreObjects; import com.google.common.io.ByteStreams; import com.google.common.primitives.UnsignedBytes; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Arrays; import java.util.Comparator; import javax.annotation.concurrent.NotThreadSafe; import org.apache.beam.sdk.coders.AtomicCoder; import org.apache.beam.sdk.coders.ByteArrayCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; import org.apache.beam.sdk.util.VarInt; /** * An elastic-sized byte array which allows you to manipulate it as a stream, or access * it directly. This allows for a quick succession of moving bytes from an {@link InputStream} * to this wrapper to be used as an {@link OutputStream} and vice versa. This wrapper * also provides random access to bytes stored within. This wrapper allows users to finely * control the number of byte copies that occur. * * <p>Anything stored within the in-memory buffer from offset {@link #size()} is considered * temporary unused storage. */ @NotThreadSafe public class RandomAccessData { /** * A {@link Coder} which encodes the valid parts of this stream. * This follows the same encoding scheme as {@link ByteArrayCoder}. * This coder is deterministic and consistent with equals. * * <p>This coder does not support encoding positive infinity. */ public static class RandomAccessDataCoder extends AtomicCoder<RandomAccessData> { private static final RandomAccessDataCoder INSTANCE = new RandomAccessDataCoder(); public static RandomAccessDataCoder of() { return INSTANCE; } @Override public void encode(RandomAccessData value, OutputStream outStream) throws CoderException, IOException { encode(value, outStream, Coder.Context.NESTED); } @Override public void encode(RandomAccessData value, OutputStream outStream, Coder.Context context) throws CoderException, IOException { if (value == POSITIVE_INFINITY) { throw new CoderException("Positive infinity can not be encoded."); } if (!context.isWholeStream) { VarInt.encode(value.size, outStream); } value.writeTo(outStream, 0, value.size); } @Override public RandomAccessData decode(InputStream inStream) throws CoderException, IOException { return decode(inStream, Coder.Context.NESTED); } @Override public RandomAccessData decode(InputStream inStream, Coder.Context context) throws CoderException, IOException { RandomAccessData rval = new RandomAccessData(); if (!context.isWholeStream) { int length = VarInt.decodeInt(inStream); rval.readFrom(inStream, 0, length); } else { ByteStreams.copy(inStream, rval.asOutputStream()); } return rval; } @Override public void verifyDeterministic() {} @Override public boolean consistentWithEquals() { return true; } @Override public boolean isRegisterByteSizeObserverCheap(RandomAccessData value) { return true; } @Override protected long getEncodedElementByteSize(RandomAccessData value) throws Exception { if (value == null) { throw new CoderException("cannot encode a null in memory stream"); } return VarInt.getLength(value.size) + value.size; } } public static final UnsignedLexicographicalComparator UNSIGNED_LEXICOGRAPHICAL_COMPARATOR = new UnsignedLexicographicalComparator(); /** * A {@link Comparator} that compares two byte arrays lexicographically. It compares * values as a list of unsigned bytes. The first pair of values that follow any common prefix, * or when one array is a prefix of the other, treats the shorter array as the lesser. * For example, {@code [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02] < POSITIVE INFINITY}. * * <p>Note that a token type of positive infinity is supported and is greater than * all other {@link RandomAccessData}. */ public static final class UnsignedLexicographicalComparator implements Comparator<RandomAccessData> { // Do not instantiate private UnsignedLexicographicalComparator() { } @Override public int compare(RandomAccessData o1, RandomAccessData o2) { return compare(o1, o2, 0 /* start from the beginning */); } /** * Compare the two sets of bytes starting at the given offset. */ public int compare(RandomAccessData o1, RandomAccessData o2, int startOffset) { if (o1 == o2) { return 0; } if (o1 == POSITIVE_INFINITY) { return 1; } if (o2 == POSITIVE_INFINITY) { return -1; } int minBytesLen = Math.min(o1.size, o2.size); for (int i = startOffset; i < minBytesLen; i++) { // unsigned comparison int b1 = o1.buffer[i] & 0xFF; int b2 = o2.buffer[i] & 0xFF; if (b1 == b2) { continue; } // Return the stream with the smaller byte as the smaller value. return b1 - b2; } // If one is a prefix of the other, return the shorter one as the smaller one. // If both lengths are equal, then both streams are equal. return o1.size - o2.size; } /** * Compute the length of the common prefix of the two provided sets of bytes. */ public int commonPrefixLength(RandomAccessData o1, RandomAccessData o2) { int minBytesLen = Math.min(o1.size, o2.size); for (int i = 0; i < minBytesLen; i++) { // unsigned comparison int b1 = o1.buffer[i] & 0xFF; int b2 = o2.buffer[i] & 0xFF; if (b1 != b2) { return i; } } return minBytesLen; } } /** A token type representing positive infinity. */ static final RandomAccessData POSITIVE_INFINITY = new RandomAccessData(0); /** * Returns a RandomAccessData that is the smallest value of same length which * is strictly greater than this. Note that if this is empty or is all 0xFF then * a token value of positive infinity is returned. * * <p>The {@link UnsignedLexicographicalComparator} supports comparing {@link RandomAccessData} * with support for positive infinitiy. */ public RandomAccessData increment() throws IOException { RandomAccessData copy = copy(); for (int i = copy.size - 1; i >= 0; --i) { if (copy.buffer[i] != UnsignedBytes.MAX_VALUE) { copy.buffer[i] = UnsignedBytes.checkedCast(UnsignedBytes.toInt(copy.buffer[i]) + 1); return copy; } } return POSITIVE_INFINITY; } private static final int DEFAULT_INITIAL_BUFFER_SIZE = 128; /** Constructs a RandomAccessData with a default buffer size. */ public RandomAccessData() { this(DEFAULT_INITIAL_BUFFER_SIZE); } /** Constructs a RandomAccessData with the initial buffer. */ public RandomAccessData(byte[] initialBuffer) { checkNotNull(initialBuffer); this.buffer = initialBuffer; this.size = initialBuffer.length; } /** Constructs a RandomAccessData with the given buffer size. */ public RandomAccessData(int initialBufferSize) { checkArgument(initialBufferSize >= 0, "Expected initial buffer size to be greater than zero."); this.buffer = new byte[initialBufferSize]; } private byte[] buffer; private int size; /** Returns the backing array. */ public byte[] array() { return buffer; } /** Returns the number of bytes in the backing array that are valid. */ public int size() { return size; } /** Resets the end of the stream to the specified position. */ public void resetTo(int position) { ensureCapacity(position); size = position; } private final OutputStream outputStream = new OutputStream() { @Override public void write(int b) throws IOException { ensureCapacity(size + 1); buffer[size] = (byte) b; size += 1; } @Override public void write(byte[] b, int offset, int length) throws IOException { ensureCapacity(size + length); System.arraycopy(b, offset, buffer, size, length); size += length; } }; /** * Returns an output stream which writes to the backing buffer from the current position. * Note that the internal buffer will grow as required to accomodate all data written. */ public OutputStream asOutputStream() { return outputStream; } /** * Returns an {@link InputStream} wrapper which supplies the portion of this backing byte buffer * starting at {@code offset} and up to {@code length} bytes. Note that the returned * {@link InputStream} is only a wrapper and any modifications to the underlying * {@link RandomAccessData} will be visible by the {@link InputStream}. */ public InputStream asInputStream(final int offset, final int length) { return new ByteArrayInputStream(buffer, offset, length); } /** * Writes {@code length} bytes starting at {@code offset} from the backing data store to the * specified output stream. */ public void writeTo(OutputStream out, int offset, int length) throws IOException { out.write(buffer, offset, length); } /** * Reads {@code length} bytes from the specified input stream writing them into the backing * data store starting at {@code offset}. * * <p>Note that the in memory stream will be grown to ensure there is enough capacity. */ public void readFrom(InputStream inStream, int offset, int length) throws IOException { ensureCapacity(offset + length); ByteStreams.readFully(inStream, buffer, offset, length); size = offset + length; } /** Returns a copy of this RandomAccessData. */ public RandomAccessData copy() throws IOException { RandomAccessData copy = new RandomAccessData(size); writeTo(copy.asOutputStream(), 0, size); return copy; } @Override public boolean equals(Object other) { if (other == this) { return true; } if (!(other instanceof RandomAccessData)) { return false; } return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(this, (RandomAccessData) other) == 0; } @Override public int hashCode() { int result = 1; for (int i = 0; i < size; ++i) { result = 31 * result + buffer[i]; } return result; } @Override public String toString() { return MoreObjects.toStringHelper(this) .add("buffer", Arrays.copyOf(buffer, size)) .add("size", size) .toString(); } private void ensureCapacity(int minCapacity) { // If we have enough space, don't grow the buffer. if (minCapacity <= buffer.length) { return; } // Try to double the size of the buffer, if thats not enough, just use the new capacity. // Note that we use Math.min(long, long) to not cause overflow on the multiplication. int newCapacity = (int) Math.min(Integer.MAX_VALUE - 8, buffer.length * 2L); if (newCapacity < minCapacity) { newCapacity = minCapacity; } buffer = Arrays.copyOf(buffer, newCapacity); } }