/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.aliyun.odps.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import com.aliyun.odps.utils.CommonUtils;
import com.aliyun.odps.utils.ReflectionUtils;
/**
* Tuple 是 {@link Writable} 对象的有序集.
*
* <p>
* Tuple 将 {@link Writable} 对象保存在一个 {@link List} 中,每个 {@link Writable} 对象都是一个
* Tuple域 (Field)。<br/>
* Tuple 的方法与 {@link Record} 有些类似,不同之处:
* <ul>
* <li>Tuple 不含 schema 信息,只是数据的集合。
* <li>Tuple 可以随意创建,{@link Record} 由 MapReduce 框架创建。
* </ul>
*
* @see TupleReaderWriter
*/
public class Tuple implements WritableComparable<Object> {
/**
* 保存 Tuple 所有 Fields.
*/
protected List<Writable> mFields;
/**
* {@link #toDelimitedString()} 使用的默认列分隔符:CTRL + A
*/
public final static char DELIM = '\u0001';
/**
* {@link #toDelimitedString()} 使用的默认NULL指示符: \N
*/
public final static String NULLINDICATOR = "\\N";
/**
* 默认构造函数
*/
public Tuple() {
mFields = new ArrayList<Writable>();
}
/**
* 构造一个指定大小的 Tuple.
*
* @param size
* {@link Writable} 对象个数
*/
public Tuple(int size) {
mFields = new ArrayList<Writable>(size);
for (int i = 0; i < size; i++) {
mFields.add(null);
}
}
/**
* 给定 {@link Writable} 数组构造 Tuple.
*
* @param fields
* {@link Writable} 数组
*/
public Tuple(Writable[] fields) {
mFields = new ArrayList<Writable>(fields.length);
for (Writable field : fields) {
mFields.add(field);
}
}
/**
* 给定 {@link List} 构造 Tuple.
*
* @param vals
* @param reference
* 如果为true,新创建的 Tuple 直接使用 vals 保存 Fields,否则,会创建一个新的 {@link List}.
*/
public Tuple(List<Writable> vals, boolean reference) {
if (!reference) {
Tuple temp = new Tuple(vals.toArray(new Writable[vals.size()])).clone();
this.mFields = temp.mFields;
} else {
this.mFields = vals;
}
}
/**
* 复制另一个 Tuple 的数据
*
* @param tuple
* 待复制的 Tuple 对象
* @param reference
* 如果为true,共享 {@link List}.
*/
public Tuple(Tuple tuple, boolean reference) {
if (!reference) {
this.mFields = tuple.clone().mFields;
} else {
this.mFields = tuple.mFields;
}
}
/**
* Find the size of the tuple. Used to be called arity().
*
* @return number of fields in the tuple.
*/
public int size() {
return mFields.size();
}
/**
* Get the value in a given field.
*
* @param fieldNum
* Number of the field to get the value for.
* @return value, as an Object.
* @throws IOException
* if the field number is greater than or equal to the number of
* fields in the tuple.
*/
public Writable get(int fieldNum) {
return mFields.get(fieldNum);
}
/**
* Get all of the fields in the tuple as a list.
*
* @return List<Object> containing the fields of the tuple in order.
*/
public List<Writable> getAll() {
return mFields;
}
/**
* 得到Tuple的数组形式
*
* @return 返回Tuple中存放的{@link Writable}数组
*/
public Writable[] toArray() {
return mFields.toArray(new Writable[mFields.size()]);
}
/**
* Set the value in a given field.
*
* @param fieldNum
* Number of the field to set the value for.
* @param val
* Writable to put in the indicated field.
* @throws IOException
* if the field number is greater than or equal to the number of
* fields in the tuple.
*/
public void set(int fieldNum, Writable val) throws IOException {
mFields.set(fieldNum, val);
}
/**
* 设置Tuple中存放的{@link Writable}对象,复用vals
*
* @param vals
* 待设置的{@link Writable}对象
*/
public void set(List<Writable> vals) {
mFields = vals;
}
/**
* 设置Tuple中存放的{@link Writable}对象
*
* @param vals
* 待设置的{@link Writable}对象
*/
public void set(Writable[] vals) {
mFields.clear();
for (Writable val : vals) {
mFields.add(val);
}
}
/**
* Append a field to a tuple. This method is not efficient as it may force
* copying of existing data in order to grow the data structure. Whenever
* possible you should construct your Tuple with the Tuple(int) method and
* then fill in the values with set(), rather than construct it with Tuple()
* and append values.
*
* @param val
* Object to append to the tuple.
*/
public void append(Writable val) {
mFields.add(val);
}
/**
* 将数组中的field添加到Tuple
*
* @param vals
* 待添加的field
*/
public void append(Writable[] vals) {
for (Writable val : vals) {
mFields.add(val);
}
}
/**
* 将集合中的field添加到Tuple
*
* @param vals
* 待添加的field
*/
public void append(Collection<Writable> vals) {
mFields.addAll(vals);
}
/**
* 清空Tuple中的所有field
*/
public void clear() {
mFields.clear();
}
/**
* 将Tuple中的所有field使用{@link #DELIM}连接起来,field为null时使用{@link #NULLINDICATOR}代替.
*
* @return 连接后的字符串
*/
public String toDelimitedString() {
return toDelimitedString(DELIM, NULLINDICATOR);
}
/**
* 将Tuple中的所有field使用指定的delim连接起来,field为null时使用{@link #NULLINDICATOR}代替.
*
* @param delim
* 指定的分割符
* @return 连接后的字符串
*/
public String toDelimitedString(char delim) {
return toDelimitedString(delim, NULLINDICATOR);
}
/**
* 将Tuple中的所有field使用指定的delim连接起来,field为null时使用指定的nullIndicator代替.
*
* @param delim
* 指定的分割符
* @param nullIndicator
* 指定的null替代字符串
* @return 连接后的字符串
*/
public String toDelimitedString(char delim, String nullIndicator) {
return CommonUtils.toDelimitedString(
(Writable[]) mFields.toArray(new Writable[mFields.size()]), delim,
nullIndicator);
}
private String format(Tuple tuple) {
StringBuilder sb = new StringBuilder();
sb.append('(');
for (int i = 0; i < tuple.size(); ++i) {
Writable d = tuple.get(i);
if (d != null) {
if (d instanceof Tuple) {
Tuple t = (Tuple) d;
sb.append(format(t));
} else {
sb.append(d.toString());
}
} else {
sb.append("");
}
if (i != tuple.size() - 1) {
sb.append(",");
}
}
sb.append(')');
return sb.toString();
}
/**
* 获取Tuple的字符串表示形式
*/
@Override
public String toString() {
return format(this);
}
/**
* 比较两个Tuple对象,依次比较Tuple中的各个field
*/
@Override
public int compareTo(Object other) {
if (other instanceof Tuple) {
Tuple t = (Tuple) other;
int mySz = mFields.size();
int tSz = t.size();
if (tSz < mySz) {
return 1;
} else if (tSz > mySz) {
return -1;
} else {
for (int i = 0; i < mySz; i++) {
int c = TupleReaderWriter.compare(mFields.get(i), t.get(i));
if (c != 0) {
return c;
}
}
return 0;
}
} else {
return TupleReaderWriter.compare(this, (Writable) other);
}
}
/**
* 比较两个Tuple对象是否相等
*/
@Override
public boolean equals(Object other) {
return (compareTo(other) == 0);
}
/**
* 获取Tuple对象的hash直
*/
@Override
public int hashCode() {
int hash = 17;
for (Writable w : mFields) {
if (w != null) {
hash = 31 * hash + w.hashCode();
}
}
return hash;
}
/**
* 将Tuple对象序列化至指定的输出流中
*/
@Override
public void write(DataOutput out) throws IOException {
TupleReaderWriter.writeTuple(out, this);
}
/**
* 从指定的输入流中反序列化出Tuple对象
*/
@Override
public void readFields(DataInput in) throws IOException {
// Clear our fields, in case we're being reused.
mFields.clear();
TupleReaderWriter.readTuple(in, this);
}
/**
* 生成当前Tuple的拷贝
*/
@Override
public Tuple clone() {
for (Writable field : mFields) {
if (field instanceof InputBlobWritable
|| field instanceof OutputBlobWritable) {
throw new RuntimeException(
"ODPS-0730001: Tuple with BlobWritable not support clone");
}
}
Tuple tuple = new Tuple();
try {
ReflectionUtils.cloneWritableInto(tuple, this);
} catch (IOException ex) {
throw new RuntimeException(ex.getMessage());
}
return tuple;
}
}