/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.file;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TCompactProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TIOStreamTransport;
import org.diqube.data.serialize.DataDeserializer;
import org.diqube.data.serialize.DataSerializer;
import org.diqube.data.serialize.DeserializationException;
import org.diqube.data.table.DefaultTableShard;
import org.diqube.data.table.TableShard;
import org.diqube.file.v1.SDiqubeFileFooter;
import org.diqube.file.v1.SDiqubeFileFooterInfo;
import org.diqube.file.v1.SDiqubeFileHeader;
import org.diqube.util.BigByteBuffer;
import org.diqube.util.ReadCountInputStream;
/**
* Reads a single .diqube file and is capable of deserializing the {@link TableShard}s stored in it.
*
* @author Bastian Gloeckle
*/
public class DiqubeFileReader {
/** Number of bytes {@link SDiqubeFileFooterInfo} takes up at the end of the file. */
private static long FILE_FOOTER_LENGTH_BYTES = -1L;
private DataDeserializer deserializer;
private SDiqubeFileFooter footer;
private SDiqubeFileHeader header;
private BigByteBuffer data;
private long firstTableShardByteIndex;
private long lastTableShardByteIndex;
/* package */ DiqubeFileReader(DataDeserializer deserializer, BigByteBuffer data) throws IOException {
this.deserializer = deserializer;
this.data = data;
// validate file header.
try (ReadCountInputStream is = new ReadCountInputStream(data.createInputStream())) {
TIOStreamTransport transport = new TIOStreamTransport(is);
TProtocol compactProt = new TCompactProtocol(transport);
header = new SDiqubeFileHeader();
header.read(compactProt);
// first TableShard byte is followed the SDiqubeFileHeader directly.
firstTableShardByteIndex = is.getNumberOfBytesRead();
if (!DiqubeFileWriter.MAGIC_STRING.equals(header.getMagic()))
throw new IOException("File is invalid.");
if (header.getFileVersion() != DiqubeFileWriter.FILE_VERSION)
throw new IOException("Only file version " + DiqubeFileWriter.FILE_VERSION + " supported, but found version "
+ header.getFileVersion());
if (header.getContentVersion() != DataSerializer.DATA_VERSION)
throw new IOException("Only content version " + DataSerializer.DATA_VERSION + " supported, but found version "
+ header.getContentVersion());
} catch (TException e) {
throw new IOException("Could not load file header", e);
}
if (FILE_FOOTER_LENGTH_BYTES == -1) {
// calculate the length of SDiqubeFileFooterInfo. This is constant and equal for all files, as it is
// de-/serialized using TBinaryProtocol.
SDiqubeFileFooterInfo fileFooterInfo = new SDiqubeFileFooterInfo();
fileFooterInfo.setFooterLengthBytes(1);
try {
byte[] fileFooterInfoBytes = new TSerializer(new TBinaryProtocol.Factory()).serialize(fileFooterInfo);
FILE_FOOTER_LENGTH_BYTES = fileFooterInfoBytes.length;
} catch (TException e) {
throw new IOException("Could not calculate length of SDiqubeFileFooterInfo", e);
}
}
// read footer info
int footerLengthBytes;
try (InputStream is = data.createPartialInputStream(data.size() - FILE_FOOTER_LENGTH_BYTES, data.size())) {
TIOStreamTransport transport = new TIOStreamTransport(is);
TProtocol binaryProt = new TBinaryProtocol(transport);
SDiqubeFileFooterInfo footerInfo = new SDiqubeFileFooterInfo();
footerInfo.read(binaryProt);
footerLengthBytes = footerInfo.getFooterLengthBytes();
} catch (TException e) {
throw new IOException("Could not read length of file footer", e);
}
lastTableShardByteIndex = data.size() - FILE_FOOTER_LENGTH_BYTES - footerLengthBytes - 1;
// read footer.
try (InputStream is = data.createPartialInputStream(lastTableShardByteIndex + 1, data.size())) {
TIOStreamTransport transport = new TIOStreamTransport(is);
TProtocol compactProt = new TCompactProtocol(transport);
footer = new SDiqubeFileFooter();
footer.read(compactProt);
} catch (TException e) {
throw new IOException("Could not read footer", e);
}
}
/**
* @return The number of rows contained in the file (sum of the number of rows of all table shards in the file).
*/
public long getNumberOfRows() {
return footer.getNumberOfRows();
}
/**
* @return Number of {@link TableShard}s stored in the file.
*/
public int getNumberOfTableShards() {
return footer.getNumberOfTableShards();
}
/**
* @return The comment that is stored in the file.
*/
public String getComment() {
return footer.getComment();
}
/**
* Expert: Get the index of the first byte in the file that contains data for a TableShard.
*/
public long getTableShardDataFirstByteIndex() {
return firstTableShardByteIndex;
}
/**
* Expert: Get the index of the last byte in the file that contains data for a TableShard.
*/
public long getTableShardDataLastByteIndex() {
return lastTableShardByteIndex;
}
/**
* @return The git commit ID of which the one who wrote this file was built from.
*/
public String getWriterBuildGitCommit() {
return header.getWriterBuildGitCommit();
}
/**
* @return The timestamp on which the one who wrote this file was built.
*/
public String getWriterBuildTimestamp() {
return header.getWriterBuildTimestamp();
}
/**
* Deserializes all {@link TableShard}s stored in the file.
*/
public Collection<DefaultTableShard> loadAllTableShards() throws IOException, DeserializationException {
List<DefaultTableShard> res = new ArrayList<>();
try (InputStream is = data.createInputStream()) {
TIOStreamTransport transport = new TIOStreamTransport(is);
TProtocol compactProt = new TCompactProtocol(transport);
SDiqubeFileHeader header = new SDiqubeFileHeader();
header.read(compactProt);
for (int i = 0; i < getNumberOfTableShards(); i++) {
DefaultTableShard tableShard = deserializer.deserialize(DefaultTableShard.class, is);
res.add(tableShard);
}
} catch (TException e) {
throw new IOException("Could not load table shards", e);
}
return res;
}
}