/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;
/**
* TimestampWritable
* Writable equivalent of java.sq.Timestamp
*
* Timestamps are of the format
* YYYY-MM-DD HH:MM:SS.[fff...]
*
* We encode Unix timestamp in seconds in 4 bytes, using the MSB to signify
* whether the timestamp has a fractional portion.
*
* The fractional portion is reversed, and encoded as a VInt
* so timestamps with less precision use fewer bytes.
*
* 0.1 -> 1
* 0.01 -> 10
* 0.001 -> 100
*
*/
public class TimestampWritable implements WritableComparable<TimestampWritable> {
static final private Log LOG = LogFactory.getLog(TimestampWritable.class);
static final public byte[] nullBytes = {0x0, 0x0, 0x0, 0x0};
private static final int NO_DECIMAL_MASK = 0x7FFFFFFF;
private static final int HAS_DECIMAL_MASK = 0x80000000;
private static final ThreadLocal<DateFormat> threadLocalDateFormat =
new ThreadLocal<DateFormat>() {
@Override
protected synchronized DateFormat initialValue() {
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
}
};
private Timestamp timestamp = new Timestamp(0);
/**
* true if data is stored in timestamp field rather than byte arrays.
* allows for lazy conversion to bytes when necessary
* false otherwise
*/
private boolean bytesEmpty;
private boolean timestampEmpty;
/* Allow use of external byte[] for efficiency */
private byte[] currentBytes;
private final byte[] internalBytes = new byte[9];
private byte[] externalBytes;
private int offset;
/* Reused to read VInts */
static private final VInt vInt = new VInt();
/* Constructors */
public TimestampWritable() {
Arrays.fill(internalBytes, (byte) 0x0);
bytesEmpty = false;
currentBytes = internalBytes;
offset = 0;
clearTimestamp();
}
public TimestampWritable(byte[] bytes, int offset) {
set(bytes, offset);
}
public TimestampWritable(TimestampWritable t) {
this(t.getBytes(), 0);
}
public TimestampWritable(Timestamp t) {
set(t);
}
public void set(byte[] bytes, int offset) {
externalBytes = bytes;
this.offset = offset;
bytesEmpty = false;
currentBytes = externalBytes;
clearTimestamp();
}
public void set(Timestamp t) {
if (t == null) {
timestamp.setTime(0);
timestamp.setNanos(0);
return;
}
this.timestamp = t;
bytesEmpty = true;
timestampEmpty = false;
}
public void set(TimestampWritable t) {
if (t.bytesEmpty) {
set(t.getTimestamp());
return;
}
if (t.currentBytes == t.externalBytes) {
set(t.currentBytes, t.offset);
} else {
set(t.currentBytes, 0);
}
}
private void clearTimestamp() {
timestampEmpty = true;
}
public void writeToByteStream(Output byteStream) {
checkBytes();
byteStream.write(currentBytes, offset, getTotalLength());
}
/**
*
* @return seconds corresponding to this TimestampWritable
*/
public int getSeconds() {
if (bytesEmpty) {
return (int) (timestamp.getTime() / 1000);
}
return TimestampWritable.getSeconds(currentBytes, offset);
}
/**
*
* @return nanoseconds in this TimestampWritable
*/
public int getNanos() {
if (!timestampEmpty) {
return timestamp.getNanos();
}
return hasDecimal() ? TimestampWritable.getNanos(currentBytes, offset+4) : 0;
}
/**
*
* @return length of serialized TimestampWritable data
*/
private int getTotalLength() {
return 4 + getDecimalLength();
}
/**
*
* @return number of bytes the variable length decimal takes up
*/
private int getDecimalLength() {
checkBytes();
return hasDecimal() ? WritableUtils.decodeVIntSize(currentBytes[offset+4]) : 0;
}
public Timestamp getTimestamp() {
if (timestampEmpty) {
populateTimestamp();
}
return timestamp;
}
/**
* Used to create copies of objects
* @return a copy of the internal TimestampWritable byte[]
*/
public byte[] getBytes() {
checkBytes();
int len = getTotalLength();
byte[] b = new byte[len];
System.arraycopy(currentBytes, offset, b, 0, len);
return b;
}
/**
* @return byte[] representation of TimestampWritable that is binary
* sortable (4 byte seconds, 4 bytes for nanoseconds)
*/
public byte[] getBinarySortable() {
byte[] b = new byte[8];
int nanos = getNanos();
int seconds = HAS_DECIMAL_MASK | getSeconds();
intToBytes(seconds, b, 0);
intToBytes(nanos, b, 4);
return b;
}
/**
* Given a byte[] that has binary sortable data, initialize the internal
* structures to hold that data
* @param bytes
* @param offset
*/
public void setBinarySortable(byte[] bytes, int offset) {
int seconds = bytesToInt(bytes, offset);
int nanos = bytesToInt(bytes, offset+4);
if (nanos == 0) {
seconds &= NO_DECIMAL_MASK;
} else {
seconds |= HAS_DECIMAL_MASK;
}
intToBytes(seconds, internalBytes, 0);
setNanosBytes(nanos, internalBytes, 4);
currentBytes = internalBytes;
this.offset = 0;
}
/**
* The data of TimestampWritable can be stored either in a byte[]
* or in a Timestamp object. Calling this method ensures that the byte[]
* is populated from the Timestamp object if previously empty.
*/
private void checkBytes() {
if (bytesEmpty) {
// Populate byte[] from Timestamp
convertTimestampToBytes(timestamp, internalBytes, 0);
offset = 0;
currentBytes = internalBytes;
bytesEmpty = false;
}
}
/**
*
* @return double representation of the timestamp, accurate to nanoseconds
*/
public double getDouble() {
double seconds, nanos;
if (bytesEmpty) {
seconds = timestamp.getTime() / 1000;
nanos = timestamp.getNanos();
} else {
seconds = getSeconds();
nanos = getNanos();
}
return seconds + ((double) nanos) / 1000000000;
}
public void readFields(DataInput in) throws IOException {
in.readFully(internalBytes, 0, 4);
if (TimestampWritable.hasDecimal(internalBytes[0])) {
in.readFully(internalBytes, 4, 1);
int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]);
in.readFully(internalBytes, 5, len-1);
}
currentBytes = internalBytes;
this.offset = 0;
}
public void write(OutputStream out) throws IOException {
checkBytes();
out.write(currentBytes, offset, getTotalLength());
}
public void write(DataOutput out) throws IOException {
write((OutputStream) out);
}
public int compareTo(TimestampWritable t) {
checkBytes();
int s1 = this.getSeconds();
int s2 = t.getSeconds();
if (s1 == s2) {
int n1 = this.getNanos();
int n2 = t.getNanos();
if (n1 == n2) {
return 0;
}
return n1 - n2;
} else {
return s1 - s2;
}
}
@Override
public boolean equals(Object o) {
return compareTo((TimestampWritable) o) == 0;
}
@Override
public String toString() {
if (timestampEmpty) {
populateTimestamp();
}
String timestampString = timestamp.toString();
if (timestampString.length() > 19) {
if (timestampString.length() == 21) {
if (timestampString.substring(19).compareTo(".0") == 0) {
return threadLocalDateFormat.get().format(timestamp);
}
}
return threadLocalDateFormat.get().format(timestamp) + timestampString.substring(19);
}
return threadLocalDateFormat.get().format(timestamp);
}
@Override
public int hashCode() {
long seconds = getSeconds();
seconds <<= 32;
seconds |= getNanos();
return (int) ((seconds >>> 32) ^ seconds);
}
private void populateTimestamp() {
long seconds = getSeconds();
int nanos = getNanos();
timestamp.setTime(seconds * 1000);
timestamp.setNanos(nanos);
}
/** Static methods **/
/**
* Gets seconds stored as integer at bytes[offset]
* @param bytes
* @param offset
* @return the number of seconds
*/
public static int getSeconds(byte[] bytes, int offset) {
return NO_DECIMAL_MASK & bytesToInt(bytes, offset);
}
public static int getNanos(byte[] bytes, int offset) {
LazyBinaryUtils.readVInt(bytes, offset, vInt);
int val = vInt.value;
int len = (int) Math.floor(Math.log10(val)) + 1;
// Reverse the value
int tmp = 0;
while (val != 0) {
tmp *= 10;
tmp += val % 10;
val /= 10;
}
val = tmp;
if (len < 9) {
val *= Math.pow(10, 9 - len);
}
return val;
}
/**
* Writes a Timestamp's serialized value to byte array b at
* @param t
* @param b
*/
public static void convertTimestampToBytes(Timestamp t, byte[] b,
int offset) {
if (b.length < 9) {
LOG.error("byte array too short");
}
long millis = t.getTime();
int nanos = t.getNanos();
boolean hasDecimal = nanos != 0 && setNanosBytes(nanos, b, offset+4);
setSecondsBytes(millis, b, offset, hasDecimal);
}
/**
* Given an integer representing seconds, write its serialized
* value to the byte array b at offset
* @param millis
* @param b
* @param offset
* @param hasDecimal
*/
private static void setSecondsBytes(long millis, byte[] b, int offset, boolean hasDecimal) {
int seconds = (int) (millis / 1000);
if (!hasDecimal) {
seconds &= NO_DECIMAL_MASK;
} else {
seconds |= HAS_DECIMAL_MASK;
}
intToBytes(seconds, b, offset);
}
/**
* Given an integer representing nanoseconds, write its serialized
* value to the byte array b at offset
*
* @param nanos
* @param b
* @param offset
* @return
*/
private static boolean setNanosBytes(int nanos, byte[] b, int offset) {
int decimal = 0;
if (nanos != 0) {
int counter = 0;
while (counter < 9) {
decimal *= 10;
decimal += nanos % 10;
nanos /= 10;
counter++;
}
}
LazyBinaryUtils.writeVLongToByteArray(b, offset, decimal);
return decimal != 0;
}
/**
* Interprets a float as a unix timestamp and returns a Timestamp object
* @param f
* @return the equivalent Timestamp object
*/
public static Timestamp floatToTimestamp(float f) {
return doubleToTimestamp((double) f);
}
public static Timestamp decimalToTimestamp(BigDecimal d) {
BigDecimal seconds = new BigDecimal(d.longValue());
long millis = d.multiply(new BigDecimal(1000)).longValue();
int nanos = d.subtract(seconds).multiply(new BigDecimal(1000000000)).intValue();
Timestamp t = new Timestamp(millis);
t.setNanos(nanos);
return t;
}
public static Timestamp doubleToTimestamp(double f) {
long seconds = (long) f;
// We must ensure the exactness of the double's fractional portion.
// 0.6 as the fraction part will be converted to 0.59999... and
// significantly reduce the savings from binary serializtion
BigDecimal bd = new BigDecimal(String.valueOf(f));
bd = bd.subtract(new BigDecimal(seconds)).multiply(new BigDecimal(1000000000));
int nanos = bd.intValue();
// Convert to millis
long millis = seconds * 1000;
Timestamp t = new Timestamp(millis);
// Set remaining fractional portion to nanos
t.setNanos(nanos);
return t;
}
public static void setTimestamp(Timestamp t, byte[] bytes, int offset) {
boolean hasDecimal = hasDecimal(bytes[offset]);
t.setTime(((long) TimestampWritable.getSeconds(bytes, offset)) * 1000);
if (hasDecimal) {
t.setNanos(TimestampWritable.getNanos(bytes, offset+4));
}
}
public static Timestamp createTimestamp(byte[] bytes, int offset) {
Timestamp t = new Timestamp(0);
TimestampWritable.setTimestamp(t, bytes, offset);
return t;
}
public boolean hasDecimal() {
return hasDecimal(currentBytes[offset]);
}
/**
*
* @param b first byte in an encoded TimestampWritable
* @return true if it has a decimal portion, false otherwise
*/
public static boolean hasDecimal(byte b) {
return (b >> 7) != 0;
}
/**
* Writes <code>value</code> into <code>dest</code> at <code>offset</code>
* @param value
* @param dest
* @param offset
*/
private static void intToBytes(int value, byte[] dest, int offset) {
dest[offset] = (byte) ((value >> 24) & 0xFF);
dest[offset+1] = (byte) ((value >> 16) & 0xFF);
dest[offset+2] = (byte) ((value >> 8) & 0xFF);
dest[offset+3] = (byte) (value & 0xFF);
}
/**
*
* @param bytes
* @param offset
* @return integer represented by the four bytes in <code>bytes</code>
* beginning at <code>offset</code>
*/
private static int bytesToInt(byte[] bytes, int offset) {
return ((0xFF & bytes[offset]) << 24)
| ((0xFF & bytes[offset+1]) << 16)
| ((0xFF & bytes[offset+2]) << 8)
| (0xFF & bytes[offset+3]);
}
}