/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.file;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TCompactProtocol;
import org.diqube.buildinfo.BuildInfo;
import org.diqube.data.serialize.DataSerializer;
import org.diqube.data.serialize.DataSerializer.ObjectDoneConsumer;
import org.diqube.data.table.TableShard;
import org.diqube.data.serialize.SerializationException;
import org.diqube.file.v1.SDiqubeFileFooter;
import org.diqube.file.v1.SDiqubeFileFooterInfo;
import org.diqube.file.v1.SDiqubeFileHeader;
import com.google.common.io.ByteStreams;
/**
* Writes a single .diqube file which can contain multiple serialized {@link TableShard}s.
*
* This class is {@link Closeable} and {@link #close()} needs to be called therefore. It will though not close the
* {@link OutputStream} automatically, which it writes to.
*
* @author Bastian Gloeckle
*/
public class DiqubeFileWriter implements Closeable {
public static final String MAGIC_STRING = "diqube";
public static final int FILE_VERSION = 1;
private DataSerializer serializer;
private OutputStream outputStream;
private TSerializer compactSerializer = new TSerializer(new TCompactProtocol.Factory());
private long numberOfRows = 0;
private int numberOfTableShards = 0;
private String comment = null;
/* package */ DiqubeFileWriter(DataSerializer serializer, OutputStream outputStream) throws IOException {
this.serializer = serializer;
this.outputStream = outputStream;
SDiqubeFileHeader fileHeader = new SDiqubeFileHeader();
fileHeader.setMagic(MAGIC_STRING);
fileHeader.setFileVersion(FILE_VERSION);
fileHeader.setContentVersion(DataSerializer.DATA_VERSION);
fileHeader.setWriterBuildGitCommit(BuildInfo.getGitCommitLong());
fileHeader.setWriterBuildTimestamp(BuildInfo.getTimestamp());
try {
byte[] headerBytes = compactSerializer.serialize(fileHeader);
outputStream.write(headerBytes);
outputStream.flush();
} catch (TException | IOException e) {
throw new IOException("Could not serialize/write file header", e);
}
}
/**
* Serialize & write a TableShard into the output stream and flush that stream.
*
* @param tableShard
* The shard to serialize & write
* @param objectDoneConsumer
* Called after serialization is done on a specific object, see
* {@link DataSerializer#serialize(org.diqube.data.serialize.DataSerialization, OutputStream, ObjectDoneConsumer)}
* @throws SerializationException
* Thrown if anything happens.
*/
public void writeTableShard(TableShard tableShard, ObjectDoneConsumer objectDoneConsumer)
throws SerializationException {
// remember number of rows before the objectDoneConsumer is called, but add the number of rows only after
// serializing, if an exception is thrown.
long numberOfRowsDelta = tableShard.getNumberOfRowsInShard();
serializer.serialize(tableShard, outputStream, objectDoneConsumer);
numberOfTableShards++;
numberOfRows += numberOfRowsDelta;
}
/**
* Write data of already serialized table shards to the file.
*
* @param serializedTableShards
* The serialized data of one or multiple table shards
* @param totalNumberOfRows
* The total number of rows all the TableShards contain
* @param numberOfTableShards
* The number of table shards that are provided
* @throws IOException
* If anything cannot be written.
*/
public void writeSerializedTableShards(InputStream serializedTableShards, long totalNumberOfRows,
int numberOfTableShards) throws IOException {
ByteStreams.copy(serializedTableShards, outputStream);
outputStream.flush();
this.numberOfTableShards += numberOfTableShards;
this.numberOfRows += totalNumberOfRows;
}
/**
* Writes the files footer.
*/
@Override
public void close() throws IOException {
SDiqubeFileFooter footer = new SDiqubeFileFooter();
footer.setComment((comment != null) ? comment : "");
footer.setNumberOfRows(numberOfRows);
footer.setNumberOfTableShards(numberOfTableShards);
try {
byte[] footerBytes = compactSerializer.serialize(footer);
outputStream.write(footerBytes);
SDiqubeFileFooterInfo fileFooterInfo = new SDiqubeFileFooterInfo();
fileFooterInfo.setFooterLengthBytes(footerBytes.length);
byte[] fileFooterInfoBytes = new TSerializer(new TBinaryProtocol.Factory()).serialize(fileFooterInfo);
outputStream.write(fileFooterInfoBytes);
outputStream.flush();
} catch (TException | IOException e) {
throw new IOException("Could not serialize/write footer", e);
}
}
/**
* @param comment
* Put this comment string in the generated file. Call this before {@link #close()}.
*/
public void setComment(String comment) {
this.comment = comment;
}
}