/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.aliyun.odps.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.aliyun.odps.utils.ReflectionUtils;
/**
* TupleReaderWriter 提供 {@link Tuple} 的序列化和反序列化方法.
*/
public class TupleReaderWriter {
// IMPORTANT! This list can be used to record values of data on disk,
// so do not change the values. You may strand user data.
// IMPORTANT! Order matters here, as compare() below uses the order to
// order unlike datatypes. Don't change this ordering.
// Spaced unevenly to leave room for new entries without changing
// values or creating order issues.
private static final byte UNKNOWN = 0;
private static final byte NULL = 1;
private static final byte NULLWRITABLE = 2;
private static final byte BOOLEANWRITABLE = 3;
private static final byte BYTESWRITABLE = 4;
private static final byte INTWRITABLE = 5;
private static final byte LONGWRITABLE = 6;
private static final byte DATETIMEWRITABLE = 7;
private static final byte DOUBLEWRITABLE = 8;
private static final byte TEXT = 9;
private static final byte TUPLE = 100;
private static Log LOG = LogFactory.getLog(TupleReaderWriter.class);
private static byte findType(Writable o) {
if (o == null) {
return NULL;
}
// Try to put the most common first
if (o instanceof LongWritable) {
return LONGWRITABLE;
} else if (o instanceof IntWritable) {
return INTWRITABLE;
} else if (o instanceof Text) {
return TEXT;
} else if (o instanceof DoubleWritable) {
return DOUBLEWRITABLE;
} else if (o instanceof BooleanWritable) {
return BOOLEANWRITABLE;
} else if (o instanceof DatetimeWritable) {
return DATETIMEWRITABLE;
} else if (o instanceof BytesWritable) {
return BYTESWRITABLE;
} else if (o instanceof NullWritable) {
return NULLWRITABLE;
} else if (o instanceof Tuple) {
return TUPLE;
}
return UNKNOWN;
}
/**
* /** Compare two objects to each other. This function is necessary because
* there's no super class that implements compareTo. This function provides an
* (arbitrary) ordering of objects of different types as follows: NULL <
* BOOLEAN < BYTE < INTEGER < LONG < FLOAT < DOUBLE * <
* BYTEARRAY < STRING < MAP < TUPLE < BAG. No other functions
* should implement this cross object logic. They should call this function
* for it instead.
*
* @param o1
* First object
* @param o2
* Second object
* @return -1 if o1 is less, 0 if they are equal, 1 if o2 is less.
*/
public static int compare(Writable o1, Writable o2) {
byte dt1 = findType(o1);
byte dt2 = findType(o2);
return compare(o1, o2, dt1, dt2);
}
/**
* Same as {@link #compare(Object, Object)}, but does not use reflection to
* determine the type of passed in objects, relying instead on the caller to
* provide the appropriate values, as determined by {@link findType(Object)}.
*
* Use this version in cases where multiple objects of the same type have to
* be repeatedly compared.
*
* @param o1
* first object
* @param o2
* second object
* @param dt1
* type, as byte value, of o1
* @param dt2
* type, as byte value, of o2
* @return -1 if o1 is < o2, 0 if they are equal, 1 if o1 > o2
*/
@SuppressWarnings({"unchecked", "rawtypes"})
public static int compare(Writable o1, Writable o2, byte dt1, byte dt2) {
if (dt1 == dt2) {
switch (dt1) {
case NULL:
case NULLWRITABLE:
return 0;
case BOOLEANWRITABLE:
return ((BooleanWritable) o1).compareTo((BooleanWritable) o2);
case BYTESWRITABLE:
return ((BytesWritable) o1).compareTo((BytesWritable) o2);
case INTWRITABLE:
return ((IntWritable) o1).compareTo((IntWritable) o2);
case LONGWRITABLE:
return ((LongWritable) o1).compareTo((LongWritable) o2);
case DATETIMEWRITABLE:
return ((DatetimeWritable) o1).compareTo((DatetimeWritable) o2);
case DOUBLEWRITABLE:
return ((DoubleWritable) o1).compareTo((DoubleWritable) o2);
case TEXT:
return ((Text) o1).compareTo((Text) o2);
case TUPLE:
return ((Tuple) o1).compareTo((Tuple) o2);
case UNKNOWN:
if (o1 instanceof WritableComparable
&& o2 instanceof WritableComparable) {
return ((WritableComparable) o1).compareTo((WritableComparable) o2);
}
throw new RuntimeException("ODPS-0730001: Class "
+ o1.getClass().getName() + " is not comparable");
default:
throw new RuntimeException("Not support type " + dt1 + " in compare");
}
} else if (dt1 < dt2) {
return -1;
} else {
return 1;
}
}
/**
* {@link Tuple} 对象的 {@link WritableComparator} 自然顺序实现(升序).
*/
public static class TupleRawComparator extends WritableComparator {
public TupleRawComparator() {
super(Tuple.class, true);
}
}
/**
* 从指定的输入流反序列化至指定的Tuple对象
*
* @param in
* 输入流,含有Tuple的field的字节
* @param t
* 反序列化后的Tuple对象
* @throws IOException
* 输入流中的字节不是某个Tuple序列化后的字节
*/
public static void readTuple(DataInput in, Tuple t) throws IOException {
// Make sure it's a tuple.
byte b = in.readByte();
if (b != TUPLE) {
String msg = "Unexpected data while reading tuple from binary file.";
throw new IOException(msg);
}
// Read the number of fields
int sz = in.readInt();
for (int i = 0; i < sz; i++) {
byte type = in.readByte();
t.append(readDatum(in, type));
}
}
@SuppressWarnings("unchecked")
private static Writable readDatum(DataInput in, byte type) throws IOException {
switch (type) {
case TUPLE:
int sz = in.readInt();
// if sz == 0, we construct an "empty" tuple -
// presumably the writer wrote an empty tuple!
if (sz < 0) {
throw new IOException("Invalid size " + sz + " for a tuple");
}
Tuple tp = new Tuple(sz);
for (int i = 0; i < sz; i++) {
byte b = in.readByte();
tp.set(i, readDatum(in, b));
}
return tp;
case NULL:
return null;
case INTWRITABLE:
IntWritable iw = new IntWritable();
iw.readFields(in);
return iw;
case LONGWRITABLE:
LongWritable lw = new LongWritable();
lw.readFields(in);
return lw;
case DATETIMEWRITABLE:
DatetimeWritable dtw = new DatetimeWritable();
dtw.readFields(in);
return dtw;
case DOUBLEWRITABLE:
DoubleWritable dw = new DoubleWritable();
dw.readFields(in);
return dw;
case BOOLEANWRITABLE:
BooleanWritable bw = new BooleanWritable();
bw.readFields(in);
return bw;
case BYTESWRITABLE:
BytesWritable bsw = new BytesWritable();
bsw.readFields(in);
return bsw;
case TEXT:
Text t = new Text();
t.readFields(in);
return t;
case NULLWRITABLE:
NullWritable nw = NullWritable.get();
nw.readFields(in);
return nw;
case UNKNOWN:
String clsName = in.readUTF();
try {
Class<? extends Writable> cls;
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
if (classLoader != null) {
cls = (Class<? extends Writable>) Class.forName(clsName, true, classLoader);
} else {
cls = (Class<? extends Writable>) Class.forName(clsName);
}
Writable w = (Writable) ReflectionUtils.newInstance(cls, null);
w.readFields(in);
return w;
} catch (RuntimeException re) {
LOG.info(re.getMessage());
throw new IOException(re);
} catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe);
}
default:
throw new RuntimeException("Unexpected data type " + type
+ " found in stream.");
}
}
/**
* 将指定的Tuple对象序列化至指定的输出流中
*
* @param out
* Tuple对象要写入的输出流
* @param t
* 待写出的Tuple对象
* @throws IOException
* 待序列化的Tuple对象在序列化其field对象出现异常
*/
public static void writeTuple(DataOutput out, Tuple t) throws IOException {
out.writeByte(TUPLE);
int sz = t.size();
out.writeInt(sz);
for (int i = 0; i < sz; i++) {
writeDatum(out, t.get(i));
}
}
private static void writeDatum(DataOutput out, Writable val)
throws IOException {
// Read the data type
byte type = findType(val);
switch (type) {
case TUPLE:
Tuple t = (Tuple) val;
out.writeByte(TUPLE);
int sz = t.size();
out.writeInt(sz);
for (int i = 0; i < sz; i++) {
writeDatum(out, t.get(i));
}
break;
case NULL:
out.writeByte(NULL);
break;
case INTWRITABLE:
out.writeByte(INTWRITABLE);
((IntWritable) val).write(out);
break;
case LONGWRITABLE:
out.writeByte(LONGWRITABLE);
((LongWritable) val).write(out);
break;
case DATETIMEWRITABLE:
out.writeByte(DATETIMEWRITABLE);
((DatetimeWritable) val).write(out);
break;
case DOUBLEWRITABLE:
out.writeByte(DOUBLEWRITABLE);
((DoubleWritable) val).write(out);
break;
case BOOLEANWRITABLE:
out.writeByte(BOOLEANWRITABLE);
((BooleanWritable) val).write(out);
break;
case BYTESWRITABLE:
out.writeByte(BYTESWRITABLE);
((BytesWritable) val).write(out);
break;
case TEXT:
out.writeByte(TEXT);
((Text) val).write(out);
break;
case NULLWRITABLE:
out.writeByte(NULLWRITABLE);
((NullWritable) val).write(out);
break;
case UNKNOWN:
out.writeByte(UNKNOWN);
out.writeUTF(val.getClass().getName());
val.write(out);
break;
default:
throw new RuntimeException("Unexpected data type " + type
+ " found in stream.");
}
}
}