/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.test.recordJobs.util;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import eu.stratosphere.types.Value;
public class Tuple implements Value {
private static final long serialVersionUID = 1L;
private byte[] bytes;
private int[] offsets;
private int numCols;
/**
* Instantiates an empty tuple.
*/
public Tuple() {
numCols = 0;
}
/**
* Creates a new tuple with a given set of attributes.
*
* @param bytes
* The bytes array. Attributes are separated by a single character. The last attribute
* is also terminated with a single character.
* @param offsets
* The offsets of the columns in the byte array. The last entry gives the offset of the terminating
* character + 1 (if the byte array exactly holds all attributes and delimiting characters this is
* the length of the array).
* @param cols
* The number of columns.
*/
public Tuple(byte[] bytes, int[] offsets, int cols) {
this.bytes = bytes;
this.offsets = offsets;
this.numCols = cols;
}
// ------------------------------------------------------------------------
// Accessors
// ------------------------------------------------------------------------
/**
* Returns the number of attributes / columns of the tuple.
*
* @return The number of columns of the tuple.
*/
public int getNumberOfColumns() {
return numCols;
}
/**
* Returns the internal byte array of the tuple.
*
* @return The internal byte array of the tuple.
*/
public byte[] getBytes() {
return bytes;
}
/**
* Returns the length of the column with the specified index. Column indices start at 0.
*
* @param colNumber Index of the column. Indices start at 0.
* @return The length of the specified column.
*/
public int getColumnLength(int colNumber) {
if(offsets == null) return -1;
if(colNumber < 0) return -1;
if(colNumber >= offsets.length) return -1;
return offsets[colNumber + 1] - offsets[colNumber] - 1;
}
// ------------------------------------------------------------------------
// Modification
// ------------------------------------------------------------------------
/**
* Appends all columns of the specified tuple to this tuple.
*
* @param other The tuple whose columns are appended to this tuple.
*/
public void concatenate(Tuple other) {
if(other.getBytes() == null) return;
if (bytes == null) {
bytes = (byte[]) other.bytes.clone();
offsets = (int[]) other.offsets.clone();
numCols = other.numCols;
} else {
int len = offsets[numCols];
int otherLen = other.offsets[other.numCols];
int totalLen = len + otherLen;
// bytes:
// our content
if (bytes.length < totalLen) {
byte[] tmp = new byte[totalLen];
System.arraycopy(bytes, 0, tmp, 0, len);
bytes = tmp;
}
// the other's content
System.arraycopy(other.bytes, 0, bytes, len, otherLen);
// offsets
if (offsets.length < numCols + other.numCols + 1) {
int[] tmp = new int[numCols + other.numCols + 1];
System.arraycopy(offsets, 0, tmp, 0, numCols + 1);
offsets = tmp;
}
// other offsets
for (int i = 1; i < other.numCols + 1; i++) {
offsets[numCols + i] = other.offsets[i] + len;
}
numCols += other.numCols;
}
}
/**
* Performs a projection on the tuple.
* The int parameter is interpreted as a bitmap on the columns.
* I.e. a bitmap value of 1 projects to the first column, 2 to the second, 3 to the first two columns, and so on.
*
* @param bitmap the projection bitmap.
*/
public void project(int bitmap) {
int[] lengths = new int[numCols];
int lenCount = 0;
if(bytes == null || offsets == null) return;
// go through the bitmap and find the indexes of the columns to retain
int k = 0;
for (int i = 0; bitmap != 0 && i < numCols; i++, bitmap >>>= 1) {
if ((bitmap & 0x1) != 0) {
int len = offsets[i + 1] - offsets[i];
lengths[k] = len;
lenCount += len;
offsets[k] = offsets[i];
k++;
}
}
numCols = k;
// allocate the new (smaller) array
byte[] tmp = new byte[lenCount];
lenCount = 0;
// copy the columns to the beginning and adjust the offsets to the new array
for (int i = 0; i < k; i++) {
System.arraycopy(bytes, offsets[i], tmp, lenCount, lengths[i]);
offsets[i] = lenCount;
lenCount += lengths[i];
}
bytes = tmp;
offsets[numCols] = tmp.length;
}
/**
* Compares a String attribute of this tuple with a String attribute of another tuple.
* The strings are compared lexicographic.
*
* @param other The other tuple.
* @param thisColumn The index of this tuple's String attribute.
* @param otherColumn The index of the other tuple's String attribute.
* @return 1 if this tuple's attribute is greater, 0 if both attributes have the same value,
* -1 if this tuple's attribute is smaller.
* @throws IndexOutOfBoundsException Thrown if one of the column indices is invalid (smaller than 0 or bigger
* than the attribute count).
*/
public int compareStringAttribute(Tuple other, int thisColumn, int otherColumn) {
if(thisColumn < 0) throw new IndexOutOfBoundsException();
if(otherColumn < 0) throw new IndexOutOfBoundsException();
if(thisColumn >= numCols) throw new IndexOutOfBoundsException();
if(otherColumn >= other.numCols) throw new IndexOutOfBoundsException();
int len = getColumnLength(thisColumn);
int otherLen = other.getColumnLength(otherColumn);
int min = Math.min(len, otherLen);
int startPos = offsets[thisColumn];
int otherStartPos = other.offsets[otherColumn];
for (int i = 0; i < min; i++) {
if (bytes[startPos + i] < other.bytes[otherStartPos + i]) {
return -1;
} else if (bytes[startPos + i] > other.bytes[otherStartPos + i]) {
return 1;
}
}
if (len < otherLen) {
return -1;
} else if (len > otherLen) {
return 1;
} else {
return 0;
}
}
/**
* Compares an Integer attribute of this tuple with an Integer attribute of another tuple.
*
* @param other The other tuple.
* @param thisColumn The index of this tuple's int attribute.
* @param otherColumn The index of the other tuple's int attribute.
* @return 1 if this tuple's attribute is greater, 0 if both attributes have the same value,
* -1 if this tuple's attribute is smaller.
* @throws IndexOutOfBoundsException Thrown if one of the column indices is invalid (smaller than 0 or bigger
* than the attribute count).
*/
public int compareIntAttribute(Tuple other, int thisColumn, int otherColumn) {
int len = getColumnLength(thisColumn);
int otherLen = other.getColumnLength(otherColumn);
if(thisColumn < 0) throw new IndexOutOfBoundsException();
if(otherColumn < 0) throw new IndexOutOfBoundsException();
if(thisColumn >= numCols) throw new IndexOutOfBoundsException();
if(otherColumn >= other.numCols) throw new IndexOutOfBoundsException();
short thisNegative = 1;
short otherNegative = 1;
if(this.bytes[offsets[thisColumn]] == '-') {
thisNegative = -1;
}
if(other.getBytes()[other.offsets[otherColumn]] == '-') {
otherNegative = -1;
}
// check one int is negative
if(thisNegative != otherNegative) {
return thisNegative;
}
// check if they vary in length
if (len < otherLen) {
return -1 * thisNegative;
} else if (len > otherLen) {
return 1 * thisNegative;
}
// both have the same orientation and length, check digit-wise
int myStartPos = offsets[thisColumn];
int compStartPos = other.offsets[otherColumn];
for (int i = 0; i < len; i++) {
if (bytes[myStartPos + i] < other.bytes[compStartPos + i]) {
return -1 * thisNegative;
} else if (bytes[myStartPos + i] > other.bytes[compStartPos + i]) {
return 1 * thisNegative;
}
}
return 0;
}
/**
* Returns the String value of the attribute with the specified index.
*
* @param column The index of the attribute whose String value is returned.
* @return The String value of the specified attribute.
* @throws IndexOutOfBoundsException Thrown if the index of the column is invalid (smaller than 0 or bigger
* than the attribute count).
*/
public String getStringValueAt(int column) throws IndexOutOfBoundsException {
// check for validity of column index
if(column < 0) throw new IndexOutOfBoundsException();
if(column >= numCols) throw new IndexOutOfBoundsException();
int off = offsets[column];
int len = getColumnLength(column);
char[] chars = new char[len];
for (int i = 0; i < len; i++) {
chars[i] = (char) (bytes[off + i] & 0xff);
}
return new String(chars);
}
/**
* Returns the Long value of the attribute with the specified index.
* The value must be represented in the decimal system.
*
* @param column The index of the attribute whose value is returned as long.
* @return The long value of the specified attribute.
* @throws IndexOutOfBoundsException Thrown if the index of the column is invalid (smaller than 0 or bigger
* than the attribute count).
* @throws NumberFormatException Thrown if the attribute is not a valid long value
* (contains any other character than digits or '-'.)
*/
public long getLongValueAt(int column) throws IndexOutOfBoundsException, NumberFormatException {
if(column < 0) throw new IndexOutOfBoundsException();
if(column >= numCols) throw new IndexOutOfBoundsException();
int off = offsets[column];
int len = getColumnLength(column);
boolean isNegative = false;
if(bytes[off] == '-') {
isNegative = true;
off++;
len--;
}
long value = 0;
for (int i = off; i < off + len; i++) {
if(bytes[i] < '0' || bytes[i] > '9') throw new NumberFormatException();
value *= 10;
value += (bytes[i] - 48);
}
if(isNegative) {
value *= -1;
}
return value;
}
/**
* Returns an attribute which is specified by an index as byte array.
*
* @param column The index of the attribute which is returned as byte array.
* @return The value of the specified attribute as byte array value.
* @throws IndexOutOfBoundsException Thrown if the index of the column is invalid (smaller than 0 or bigger
* than the attribute count).
*/
public byte[] getByteArrayValueAt(int column) throws IndexOutOfBoundsException {
if(column < 0) throw new IndexOutOfBoundsException();
if(column >= numCols) throw new IndexOutOfBoundsException();
int len = getColumnLength(column);
byte[] buffer = new byte[len];
System.arraycopy(bytes, offsets[column], buffer, 0, len);
return buffer;
}
/**
* Sets the size of the internal byte array of the tuple to the minimum capacity.
* If the minimum capacity is smaller than the current size of the tuple's byte array,
* nothing is done. Otherwise a new byte array is allocated and the content of the old one copied.
*
* @param minCapacity The new size of the internal byte array.
*/
public void reserveSpace(int minCapacity) {
if (bytes.length < minCapacity) {
byte[] tmp = new byte[minCapacity];
System.arraycopy(bytes, 0, tmp, 0, offsets[numCols]);
bytes = tmp;
}
}
/**
* Reduces the size of the internal byte and offset arrays to the currently used size.
*/
public void compact() {
int len = offsets[numCols];
if (bytes.length > len) {
byte[] tmp = new byte[len];
System.arraycopy(bytes, 0, tmp, 0, len);
bytes = tmp;
}
if (offsets.length > numCols + 1) {
int[] tmp = new int[numCols + 1];
System.arraycopy(offsets, 0, tmp, 0, numCols + 1);
offsets = tmp;
}
}
/**
* Appends an attribute at the end of the tuple.
*
* @param attValue The attribute to append.
*/
public void addAttribute(byte[] attValue) {
int end;
if (numCols == 0) {
offsets = new int[5];
bytes = new byte[Math.max(256, attValue.length + 1)];
end = 0;
} else {
end = offsets[numCols];
// increase offset array, if necessary
if (numCols + 1 >= offsets.length) {
int[] tmp = new int[offsets.length * 2];
System.arraycopy(offsets, 0, tmp, 0, numCols + 1);
offsets = tmp;
}
// increase byte buffer, if necessary
if (bytes.length < end + attValue.length + 1) {
byte[] tmp = new byte[bytes.length + attValue.length + 1];
System.arraycopy(bytes, 0, tmp, 0, end);
bytes = tmp;
}
}
// copy bytes, offsets and increase columns
System.arraycopy(attValue, 0, bytes, end, attValue.length);
end += attValue.length;
bytes[end++] = '|';
numCols++;
offsets[numCols] = end;
}
/**
* Appends an attribute at the end of the tuple.
*
* @param attValue The attribute to append.
*/
public void addAttribute(String attValue) {
int end;
if (numCols == 0) {
offsets = new int[5];
bytes = new byte[Math.max(256, attValue.length() + 1)];
end = 0;
} else {
end = offsets[numCols];
// increase offset array, if necessary
if (numCols + 1 >= offsets.length) {
int[] tmp = new int[offsets.length * 2];
System.arraycopy(offsets, 0, tmp, 0, numCols + 1);
offsets = tmp;
}
// increase byte buffer, if necessary
if (bytes.length < end + attValue.length() + 1) {
byte[] tmp = new byte[bytes.length + attValue.length() + 1];
System.arraycopy(bytes, 0, tmp, 0, end);
bytes = tmp;
}
}
// copy bytes, offsets and increase columns
for (int i = 0; i < attValue.length(); i++, end++) {
bytes[end] = (byte) (attValue.charAt(i) & 0xff);
}
bytes[end++] = '|';
numCols++;
offsets[numCols] = end;
}
/**
* Appends an attribute by copying it from another tuple.
*
* @param other The other tuple to copy from.
* @param column The index of the attribute to copy within the other tuple.
*/
public void addAttributeFromKVRecord(Tuple other, int column) {
int len = other.getColumnLength(column) + 1;
int end;
if (numCols == 0) {
offsets = new int[5];
bytes = new byte[Math.max(256, len)];
end = 0;
} else {
end = offsets[numCols];
// increase offset array, if necessary
if (numCols + 1 >= offsets.length) {
int[] tmp = new int[offsets.length * 2];
System.arraycopy(offsets, 0, tmp, 0, numCols + 1);
offsets = tmp;
}
// increase byte buffer, if necessary
if (bytes.length < end + len) {
byte[] tmp = new byte[end + len];
System.arraycopy(bytes, 0, tmp, 0, end);
bytes = tmp;
}
}
System.arraycopy(other.bytes, other.offsets[column], bytes, end, len);
numCols++;
offsets[numCols] = end + len;
}
public void setContents(byte[] bytes, int offset, int len, char delimiter)
{
// make space
if (this.bytes == null || this.bytes.length < len) {
this.bytes = new byte[len];
}
// copy the binary data
System.arraycopy(bytes, offset, this.bytes, 0, len);
int readPos = offset;
// allocate the offsets array
if (this.offsets == null) {
this.offsets = new int[4];
}
int col = 1; // the column we are in
int startPos = readPos;
while (readPos < offset + len) {
if (bytes[readPos++] == delimiter) {
if (offsets.length <= col) {
int newOffsets[] = new int[this.offsets.length * 2];
System.arraycopy(this.offsets, 0, newOffsets, 0, this.offsets.length);
this.offsets = newOffsets;
}
this.offsets[col++] = readPos - startPos;
}
}
this.numCols = col - 1;
}
// ------------------------------------------------------------------------
// Serialization
// ------------------------------------------------------------------------
@Override
public void read(DataInput in) throws IOException {
// read the bytes
int numBytes = in.readInt();
if (numBytes > 0) {
bytes = new byte[numBytes];
in.readFully(bytes);
// read the offsets
numCols = in.readInt() + 1;
offsets = new int[numCols + 1];
for (int i = 1; i < numCols; i++) {
offsets[i] = in.readInt();
}
// set last offset
offsets[numCols] = numBytes;
} else {
numCols = 0;
}
}
@Override
public void write(DataOutput out) throws IOException {
// write the bytes
int numBytes = (numCols > 0 ? offsets[numCols] : 0);
out.writeInt(numBytes);
if (numBytes > 0) {
out.write(bytes, 0, numBytes);
// write the offsets
// exclude first and last
out.writeInt(numCols - 1);
for (int i = 1; i < numCols; i++) {
out.writeInt(offsets[i]);
}
}
}
@Override
public String toString() {
StringBuilder bld = new StringBuilder();
for (int i = 0; i < numCols; i++) {
for (int k = 0; k < getColumnLength(i); k++) {
bld.append((char) (bytes[offsets[i] + k] & 0xff));
}
bld.append('|');
}
return bld.toString();
}
}