/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.bytes.impl;
import org.lilyproject.bytes.api.DataInput;
/**
* Implementation of {@link DataInput} which reads primitve values from an underlying byte[].
* The byte[] should have been created, encoded by the related to {@link DataOutputImpl}.
*
* <p>The position within the underlying byte[] is maintained so that each read
* call will return the next value in the byte[].
*
* <p>This implementation (especially #readUTF()) is based on (and some pieces are copied from) the work
* done by Lucene in the methods <code>UTF16toUTF8</code> and <code>UTF8toUTF16</code>
* in <code>org.apache.lucene.util.UnicodeUtil.java</code> (revision 1030754),
* and combined with the work done by ElasticSearch in
* <code>org.elasticsearch.common.io.stream.BytesStreamInput.java</code>,
* <code>org.elasticsearch.common.io.stream.BytesStreamOutput.java</code>,
* <code>org.elasticsearch.common.io.stream.StreamInput.java</code>,
* <code>org.elasticsearch.common.io.stream.StreamOutput.java</code>.
*
* <p>See also <a href=http://en.wikipedia.org/wiki/UTF-16/UCS-2>http://en.wikipedia.org/wiki/UTF-16/UCS-2</a>
*/
public class DataInputImpl implements DataInput {
public static final int UNI_SUR_LOW_START = 0xDC00;
private static final long UNI_MAX_BMP = 0x0000FFFF;
private static final long HALF_SHIFT = 10;
private static final long HALF_MASK = 0x3FFL;
private final byte[] source; // The underlying byte[]
/**
* Absolute position in the underlying byte[] to start reading.
*/
private int startPosition;
/**
* Relative position of the next value to be read. This position is relative to the start position, hence it is always in
* the range from 0 to size-1.
*/
private int pos;
/**
* Relative size of the data input (i.e. not the size of the underlying byte[]).
*/
private int size;
// Character array build while reading a string.
// The same char array is reused for each read, avoiding to allocated a new array each time.
// It is resized it when needed.
private char[] chararr = new char[80];
/**
* Constructor for the {@link DataInput}.
* The source should have been created using {@link DataOutputImpl}.
*
* @param source the underlying byte[] from which the data will be read.
*/
public DataInputImpl(byte[] source) {
this(source, 0, source.length);
}
/**
* Constructor for the {@link DataInput} based on a part of a byte[] (from startPosition to startPostion + size).
* The source should have been created using {@link DataOutputImpl}.
*
* @param source the underlying byte[] from which the data will be read.
* @param startPosition start position in the source byte[]
* @param size size of the relevant part of the source[] array to consider
*/
public DataInputImpl(byte[] source, int startPosition, int size) {
this.source = source;
this.startPosition = startPosition;
this.size = size;
this.pos = 0;
}
/**
* Constructor for the {@link DataInput} based on an existing DataInputImpl.
* Its source (the underlying byte[]) is the same as for the given dataInput.
*
* @param startPosition position within the source, relative to the startPosition of the given dataInput
* @param size the size of the DataInput
*/
public DataInputImpl(DataInputImpl source, int startPosition, int size) {
this(source.source, source.startPosition + startPosition, size);
}
@Override
public byte readByte() {
return source[startPosition + pos++];
}
@Override
public byte[] readBytes(int length) {
byte[] result = new byte[length];
System.arraycopy(source, startPosition + pos, result, 0, length);
pos += length;
return result;
}
/**
* Reads an (unmodified)UTF-8 from the underlying byte[].
*
* @return the string written {@link DataOutputImpl#writeUTF(String)} in all other cases.
*/
@Override
public String readUTF() {
return readUTF(readInt());
}
@Override
public String readVUTF() {
return readUTF(readVInt());
}
@Override
public String readUTF(int utflen) {
if (utflen == -1) {
return null;
}
if (utflen == 0) {
return new String();
}
int count = startPosition + pos;
int endPos = startPosition + pos + utflen;
// Resize the chararr if it is not large enough.
if (chararr.length < utflen) {
chararr = new char[utflen * 2];
}
int chararr_count = 0; // Position within the char array
int b; // byte read
int ch; // character read
// Start with a loop expecting each character to be encoded by one byte
// This will be most likely the case for most strings.
while (count < endPos) {
b = source[count] & 0xff;
if (!(b < 0xc0)) {
break; // Once a character is encountered which is encoded with multiple bytes, jump to the next loop
}
count++;
assert b < 0x80;
ch = b;
chararr_count = putChar(chararr_count, ch);
}
// Decode characters which can be encoded by multiple bytes
while (count < endPos) {
b = source[count++] & 0xff;
if (b < 0xc0) {
assert b < 0x80;
ch = b;
} else if (b < 0xe0) {
ch = ((b & 0x1f) << 6) + (source[count++] & 0x3f);
} else if (b < 0xf0) {
ch = ((b & 0xf) << 12) + ((source[count++] & 0x3f) << 6) + (source[count++] & 0x3f);
} else {
assert b < 0xf8;
ch = ((b & 0x7) << 18) + ((source[count++] & 0x3f) << 12) + ((source[count++] & 0x3f) << 6) +
(source[count++] & 0x3f);
}
chararr_count = putChar(chararr_count, ch);
}
pos += utflen;
// The number of chars produced may be less than utflen
return new String(chararr, 0, chararr_count);
}
private int putChar(int chararr_count, int ch) {
if (ch <= UNI_MAX_BMP) {
// target is a character <= 0xFFFF
chararr[chararr_count++] = (char) ch;
} else {
// target is a character in range 0xFFFF - 0x10FFFF
chararr[chararr_count++] = (char) ((ch >> HALF_SHIFT) + 0xD7C0 /* UNI_SUR_HIGH_START - 64 */);
chararr[chararr_count++] = (char) ((ch & HALF_MASK) + UNI_SUR_LOW_START);
}
return chararr_count;
}
@Override
public boolean readBoolean() {
return (source[startPosition + pos++] != 0);
}
@Override
public double readDouble() {
return Double.longBitsToDouble(readLong());
}
@Override
public int readInt() {
return ((readByte() & 0xFF) << 24)
| ((readByte() & 0xFF) << 16)
| ((readByte() & 0xFF) << 8)
| (readByte() & 0xFF);
}
@Override
public long readLong() {
return (((long) readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
}
@Override
public int readShort() {
return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF));
}
@Override
public float readFloat() {
return Float.intBitsToFloat(readInt());
}
/**
* Reads an int stored in variable-length format. Reads between one and
* five bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
*/
@Override
public int readVInt() {
byte b = readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = readByte();
i |= (b & 0x7F) << shift;
}
return i;
}
/**
* Reads a long stored in variable-length format. Reads between one and
* nine bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
*/
@Override
public long readVLong() {
byte b = readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = readByte();
i |= (b & 0x7FL) << shift;
}
return i;
}
@Override
public int getPosition() {
return pos;
}
@Override
public int getStartPosition() {
return startPosition;
}
@Override
public void setPosition(int position) {
this.pos = position;
}
@Override
public int getSize() {
return size;
}
@Override
public void setSize(int size) {
if (size < 0 || size > source.length) {
throw new IllegalArgumentException("Invalid size: " + size + " (maximum: " + source.length + ")");
}
this.size = size;
}
@Override
public int indexOf(byte value) {
for (int i = pos; i < size; i++) {
if (source[startPosition + i] == value) {
return i;
}
}
return -1;
}
/**
* @return A copy of the "chunk" of the underlying byte[] that this data input considers
* (without taking current position into account)
*/
public byte[] asArray() {
byte[] result = new byte[size];
System.arraycopy(source, startPosition, result, 0, size);
return result;
}
}