/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.tunnel.io;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import com.aliyun.odps.TableSchema;
import com.aliyun.odps.commons.proto.ProtobufRecordStreamWriter;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.RecordPack;
import com.aliyun.odps.data.RecordReader;
/**
* 用 Protobuf 序列化存储的 {@link RecordPack}
* 和 TableTunnel 共同使用
* 比直接使用 List<Record> 更加节省内存
*/
public class ProtobufRecordPack extends RecordPack {
private ProtobufRecordStreamWriter writer;
private ByteArrayOutputStream byteos;
private long count = 0;
private TableSchema schema;
private CompressOption option = null;
private boolean isComplete = false;
/**
* 新建一个ProtobufRecordPack
*
* @param schema
* @throws IOException
*/
public ProtobufRecordPack(TableSchema schema) throws IOException {
this(schema, new Checksum());
}
/**
* 新建一个 ProtobufRecordPack,用对应的 CheckSum 初始化
*
* @param schema
* @param checkSum
* @throws IOException
*/
public ProtobufRecordPack(TableSchema schema, Checksum checkSum) throws IOException {
this(schema, checkSum, 0);
}
/**
* 新建一个 ProtobufRecordPack,用对应的 CheckSum 初始化, 并且预设流 buffer 大小为 capacity
*
* @param schema
* @param checkSum
* @param capacity
* @throws IOException
*/
public ProtobufRecordPack(TableSchema schema, Checksum checkSum, int capacity)
throws IOException {
this(schema, checkSum, capacity, null);
}
/**
* 新建一个 ProtobufRecordPack,用对应的 CheckSum 初始化, 数据压缩方式 option
*
* @param schema
* @param checksum
* @param option
* @throws IOException
*/
public ProtobufRecordPack(TableSchema schema, Checksum checksum, CompressOption option)
throws IOException {
this(schema, checksum, 0, option);
}
/**
* 新建一个 ProtobufRecordPack,用对应的 CheckSum 初始化, 数据压缩方式 option, 并且预设流 buffer 大小为 capacity
*
* @param schema
* @param checksum
* @param capacity
* @param option
* @throws IOException
*/
public ProtobufRecordPack(TableSchema schema, Checksum checksum, int capacity, CompressOption option)
throws IOException {
isComplete = false;
if (capacity == 0) {
byteos = new ByteArrayOutputStream();
} else {
byteos = new ByteArrayOutputStream(capacity);
}
this.schema = schema;
if (null != option) {
this.option = option;
}
writer = new ProtobufRecordStreamWriter(schema, byteos, option);
if (null != checksum) {
writer.setCheckSum(checksum);
}
}
@Override
public void append(Record a) throws IOException {
writer.write(a);
++count;
}
/**
* 获取 RecordReader 对象
* ProtobufRecordPack 不支持改方法
*
* @throws UnsupportedOperationException
*/
@Override
public RecordReader getRecordReader() throws IOException {
throw new UnsupportedOperationException("PBPack does not supported Read.");
}
// FIXME: 返回的并不是 probuf 的 stream,而是 protobuf 输出的那个缓冲区
public ByteArrayOutputStream getProtobufStream() throws IOException {
if (!isComplete) {
writer.flush();
}
return byteos;
}
public void complete() throws IOException {
if (!isComplete) {
writer.close();
isComplete = true;
}
}
public CompressOption getCompressOption() {
return this.option;
}
/**
* 获取当前 pack 在内存缓冲区中的大小
*
* 注意:由于在写到内存缓冲区前,数据会经过两个缓冲区(protobuf 和 defalter)
* 因此这个值的变化并不是连续的
*
* @return
* @throws IOException
*/
public long getTotalBytes() throws IOException {
return byteos.size();
}
/**
* 获取输出数据序列化后的字节数
*
* @return
* @throws IOException
*/
protected long getTotalBytesWritten() throws IOException {
writer.flush();
return writer.getTotalBytes();
}
/**
* 获取 Record 的 CheckSum
*/
public Checksum getCheckSum() {
return writer.getCheckSum();
}
/**
* 清空 RecordPack
*/
public void reset() throws IOException {
if (byteos != null) {
byteos.reset();
}
count = 0;
this.writer = new ProtobufRecordStreamWriter(schema, byteos, option);
isComplete = false;
}
public boolean isComplete() {
return isComplete;
}
/**
* 清空 RecordPack
*
* @param checksum
* 初始化 checksum
*/
public void reset(Checksum checksum) throws IOException {
reset();
if (checksum != null) {
this.writer.setCheckSum(checksum);
}
}
/**
* 返回 pack 内的 record 数量
* @return
*/
public long getSize() {
return count;
}
}