/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.graph.types.valuearray;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.graph.utils.Murmur3_32;
import org.apache.flink.types.IntValue;
import org.apache.flink.types.StringValue;
import org.apache.flink.util.Preconditions;
import java.io.IOException;
import java.nio.CharBuffer;
import java.util.Arrays;
import java.util.Iterator;
/**
* An array of {@link StringValue}.
* <p>
* Strings are serialized to a byte array. Concatenating arrays is as simple
* and fast as extending and copying byte arrays. Strings are serialized when
* individually added to {@code StringValueArray}.
* <p>
* For each string added to the array the length is first serialized using a
* variable length integer. Then the string characters are serialized using a
* variable length encoding where the lower 128 ASCII/UFT-8 characters are
* encoded in a single byte. This ensures that common characters are serialized
* in only two bytes.
*/
public class StringValueArray
implements ValueArray<StringValue> {
protected static final int DEFAULT_CAPACITY_IN_BYTES = 4096;
// see note in ArrayList, HashTable, ...
private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
protected static final int HIGH_BIT = 0x1 << 7;
private boolean isBounded;
// the initial length of a bounded array, which is allowed to expand to
// store one additional element beyond this initial length
private int boundedLength;
private byte[] data;
// number of StringValue elements currently stored
private int length;
// the number of bytes currently stored
private int position;
// state for the bookmark used by mark() and reset()
private transient int markLength;
private transient int markPosition;
// hasher used to generate the normalized key
private Murmur3_32 hash = new Murmur3_32(0x19264330);
// hash result stored as normalized key
private IntValue hashValue = new IntValue();
/**
* Initializes an expandable array with default capacity.
*/
public StringValueArray() {
isBounded = false;
initialize(DEFAULT_CAPACITY_IN_BYTES);
}
/**
* Initializes a fixed-size array with the provided number of bytes.
*
* @param bytes number of bytes of the encapsulated array
*/
public StringValueArray(int bytes) {
isBounded = true;
boundedLength = bytes;
initialize(bytes);
}
/**
* Initializes the array with the provided number of bytes.
*
* @param bytes initial size of the encapsulated array in bytes
*/
private void initialize(int bytes) {
Preconditions.checkArgument(bytes > 0, "Requested array with zero capacity");
Preconditions.checkArgument(bytes <= MAX_ARRAY_SIZE, "Requested capacity exceeds limit of " + MAX_ARRAY_SIZE);
data = new byte[bytes];
}
// --------------------------------------------------------------------------------------------
/**
* If the size of the array is insufficient to hold the given capacity then
* copy the array into a new, larger array.
*
* @param minCapacity minimum required number of elements
*/
private void ensureCapacity(int minCapacity) {
long currentCapacity = data.length;
if (minCapacity <= currentCapacity) {
return;
}
// increase capacity by at least ~50%
long expandedCapacity = Math.max(minCapacity, currentCapacity + (currentCapacity >> 1));
int newCapacity = (int) Math.min(MAX_ARRAY_SIZE, expandedCapacity);
if (newCapacity < minCapacity) {
// throw exception as unbounded arrays are not expected to fill
throw new RuntimeException("Requested array size " + minCapacity + " exceeds limit of " + MAX_ARRAY_SIZE);
}
data = Arrays.copyOf(data, newCapacity);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("[");
String separator = "";
for (StringValue sv : this) {
sb
.append(sv.getValue())
.append(separator);
separator = ",";
}
sb.append("]");
return sb.toString();
}
// --------------------------------------------------------------------------------------------
// Iterable
// --------------------------------------------------------------------------------------------
private final ReadIterator iterator = new ReadIterator();
@Override
public Iterator<StringValue> iterator() {
iterator.reset();
return iterator;
}
private class ReadIterator
implements Iterator<StringValue> {
private static final int DEFAULT_SIZE = 64;
private StringValue value = new StringValue(CharBuffer.allocate(DEFAULT_SIZE));
private int size = DEFAULT_SIZE;
private int pos;
@Override
public boolean hasNext() {
return pos < position;
}
@Override
public StringValue next() {
// read length
int len = data[pos++] & 0xFF;
if (len >= HIGH_BIT) {
int shift = 7;
int curr;
len = len & 0x7F;
while ((curr = data[pos++] & 0xFF) >= HIGH_BIT) {
len |= (curr & 0x7F) << shift;
shift += 7;
}
len |= curr << shift;
}
// ensure capacity
if (len > size) {
while (size < len) {
size *= 2;
}
value = new StringValue(CharBuffer.allocate(size));
}
// read string characters
final char[] valueData = value.getCharArray();
for (int i = 0; i < len; i++) {
int c = data[pos++] & 0xFF;
if (c >= HIGH_BIT) {
int shift = 7;
int curr;
c = c & 0x7F;
while ((curr = data[pos++] & 0xFF) >= HIGH_BIT) {
c |= (curr & 0x7F) << shift;
shift += 7;
}
c |= curr << shift;
}
valueData[i] = (char) c;
}
return value;
}
@Override
public void remove() {
throw new UnsupportedOperationException("remove");
}
public void reset() {
pos = 0;
}
}
// --------------------------------------------------------------------------------------------
// IOReadableWritable
// --------------------------------------------------------------------------------------------
@Override
public void write(DataOutputView out) throws IOException {
out.writeInt(length);
out.writeInt(position);
out.write(data, 0, position);
}
@Override
public void read(DataInputView in) throws IOException {
length = in.readInt();
position = in.readInt();
markLength = 0;
markPosition = 0;
ensureCapacity(position);
in.read(data, 0, position);
}
// --------------------------------------------------------------------------------------------
// NormalizableKey
// --------------------------------------------------------------------------------------------
@Override
public int getMaxNormalizedKeyLen() {
return hashValue.getMaxNormalizedKeyLen();
}
@Override
public void copyNormalizedKey(MemorySegment target, int offset, int len) {
hash.reset();
hash.hash(position);
for (int i = 0 ; i < position ; i++) {
hash.hash(data[i]);
}
hashValue.setValue(hash.hash());
hashValue.copyNormalizedKey(target, offset, len);
}
// --------------------------------------------------------------------------------------------
// Comparable
// --------------------------------------------------------------------------------------------
@Override
public int compareTo(ValueArray<StringValue> o) {
StringValueArray other = (StringValueArray) o;
// sorts first on number of data in the array, then comparison between
// the first non-equal element in the arrays
int cmp = Integer.compare(position, other.position);
if (cmp != 0) {
return cmp;
}
for (int i = 0 ; i < position ; i++) {
cmp = Byte.compare(data[i], other.data[i]);
if (cmp != 0) {
return cmp;
}
}
return 0;
}
// --------------------------------------------------------------------------------------------
// Key
// --------------------------------------------------------------------------------------------
@Override
public int hashCode() {
int hash = 1;
for (int i = 0 ; i < position ; i++) {
hash = 31 * hash + data[i];
}
return hash;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof StringValueArray) {
StringValueArray other = (StringValueArray) obj;
if (length != other.length) {
return false;
}
if (position != other.position) {
return false;
}
for (int i = 0 ; i < position ; i++) {
if (data[i] != other.data[i]) {
return false;
}
}
return true;
}
return false;
}
// --------------------------------------------------------------------------------------------
// ResettableValue
// --------------------------------------------------------------------------------------------
@Override
public void setValue(ValueArray<StringValue> value) {
value.copyTo(this);
}
// --------------------------------------------------------------------------------------------
// CopyableValue
// --------------------------------------------------------------------------------------------
@Override
public int getBinaryLength() {
return -1;
}
@Override
public void copyTo(ValueArray<StringValue> target) {
StringValueArray other = (StringValueArray) target;
other.length = length;
other.position = position;
other.markLength = markLength;
other.markPosition = markPosition;
other.ensureCapacity(position);
System.arraycopy(data, 0, other.data, 0, position);
}
@Override
public ValueArray<StringValue> copy() {
ValueArray<StringValue> copy = new StringValueArray();
this.copyTo(copy);
return copy;
}
@Override
public void copy(DataInputView source, DataOutputView target) throws IOException {
copyInternal(source, target);
}
protected static void copyInternal(DataInputView source, DataOutputView target) throws IOException {
int length = source.readInt();
target.writeInt(length);
int position = source.readInt();
target.writeInt(position);
target.write(source, position);
}
// --------------------------------------------------------------------------------------------
// ValueArray
// --------------------------------------------------------------------------------------------
@Override
public int size() {
return length;
}
@Override
public boolean isFull() {
if (isBounded) {
return position >= boundedLength;
} else {
return position == MAX_ARRAY_SIZE;
}
}
@Override
public boolean add(StringValue value) {
if (isBounded && position >= boundedLength) {
return false;
}
// up to five bytes storing length
if (position + 5 > data.length) {
ensureCapacity(position + 5);
}
// update local variable until serialization succeeds
int newPosition = position;
// write the length, variable-length encoded
int len = value.length();
while (len >= HIGH_BIT) {
data[newPosition++] = (byte) (len | HIGH_BIT);
len >>>= 7;
}
data[newPosition++] = (byte) len;
// write the char data, variable-length encoded
final char[] valueData = value.getCharArray();
int remainingCapacity = data.length - newPosition;
len = value.length();
for (int i = 0; i < len; i++) {
// up to three bytes storing length
if (remainingCapacity < 3) {
ensureCapacity(remainingCapacity + 3);
remainingCapacity = data.length - newPosition;
}
int c = valueData[i];
while (c >= HIGH_BIT) {
data[newPosition++] = (byte) (c | HIGH_BIT);
remainingCapacity--;
c >>>= 7;
}
data[newPosition++] = (byte) c;
remainingCapacity--;
}
length++;
position = newPosition;
return true;
}
@Override
public boolean addAll(ValueArray<StringValue> other) {
StringValueArray source = (StringValueArray) other;
int sourceSize = source.position;
int newPosition = position + sourceSize;
if (newPosition > data.length) {
if (isBounded) {
return false;
} else {
ensureCapacity(newPosition);
}
}
System.arraycopy(source.data, 0, data, position, sourceSize);
length += source.length;
position = newPosition;
return true;
}
@Override
public void clear() {
length = 0;
position = 0;
}
@Override
public void mark() {
markLength = length;
markPosition = position;
}
@Override
public void reset() {
length = markLength;
position = markPosition;
}
}