/*
* Copyright (c) [2016] [ <ether.camp> ]
* This file is part of the ethereumJ library.
*
* The ethereumJ library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The ethereumJ library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with the ethereumJ library. If not, see <http://www.gnu.org/licenses/>.
*/
package org.ethereum.net.swarm;
import org.ethereum.util.ByteUtil;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Collection;
import static java.lang.Math.max;
import static java.lang.Math.min;
/**
* From Go implementation:
*
* The distributed storage implemented in this package requires fix sized chunks of content
* Chunker is the interface to a component that is responsible for disassembling and assembling larger data.
* TreeChunker implements a Chunker based on a tree structure defined as follows:
* 1 each node in the tree including the root and other branching nodes are stored as a chunk.
* 2 branching nodes encode data contents that includes the size of the dataslice covered by its
* entire subtree under the node as well as the hash keys of all its children
* data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
* 3 Leaf nodes encode an actual subslice of the input data.
* 4 if data size is not more than maximum chunksize, the data is stored in a single chunk
* key = sha256(int64(size) + data)
* 2 if data size is more than chunksize*Branches^l, but no more than
* chunksize*Branches^l length (except the last one).
* key = sha256(int64(size) + key(slice0) + key(slice1) + ...)
* Tree chunker is a concrete implementation of data chunking.
* This chunker works in a simple way, it builds a tree out of the document so that each node either
* represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree.
* In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children.
* This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are
* transparent since their represented size component is strictly greater than their maximum data size,
* since they encode a subtree.
* If all is well it is possible to implement this by simply composing readers so that no extra allocation or
* buffering is necessary for the data splitting and joining. This means that in principle there
* can be direct IO between : memory, file system, network socket (bzz peers storage request is
* read from the socket ). In practice there may be need for several stages of internal buffering.
* Unfortunately the hashing itself does use extra copies and allocation though since it does need it.
*/
public class TreeChunker implements Chunker {
public static final MessageDigest DEFAULT_HASHER;
private static final int DEFAULT_BRANCHES = 128;
static {
try {
DEFAULT_HASHER = MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e); // Can't happen.
}
}
public class TreeChunk extends Chunk {
private static final int DATA_OFFSET = 8;
public TreeChunk(int dataSize) {
super(null, new byte[DATA_OFFSET + dataSize]);
setSubtreeSize(dataSize);
}
public TreeChunk(Chunk chunk) {
super(chunk.getKey(), chunk.getData());
}
public void setSubtreeSize(long size) {
ByteBuffer.wrap(getData()).order(ByteOrder.LITTLE_ENDIAN).putLong(0, size);
}
public long getSubtreeSize() {
return ByteBuffer.wrap(getData()).order(ByteOrder.LITTLE_ENDIAN).getLong(0);
}
public int getDataOffset() {
return DATA_OFFSET;
}
public Key getKey() {
if (key == null) {
key = new Key(hasher.digest(getData()));
}
return key;
}
@Override
public String toString() {
String dataString = ByteUtil.toHexString(
Arrays.copyOfRange(getData(), getDataOffset(), getDataOffset() + 16)) + "...";
return "TreeChunk[" + getSubtreeSize() + ", " + getKey() + ", " + dataString + "]";
}
}
public class HashesChunk extends TreeChunk {
public HashesChunk(long subtreeSize) {
super(branches * hashSize);
setSubtreeSize(subtreeSize);
}
public HashesChunk(Chunk chunk) {
super(chunk);
}
public int getKeyCount() {
return branches;
}
public Key getKey(int idx) {
int off = getDataOffset() + idx * hashSize;
return new Key(Arrays.copyOfRange(getData(), off, off + hashSize));
}
public void setKey(int idx, Key key) {
int off = getDataOffset() + idx * hashSize;
System.arraycopy(key.getBytes(), 0, getData(), off, hashSize);
}
@Override
public String toString() {
String hashes = "{";
for (int i = 0; i < getKeyCount(); i++) {
hashes += (i == 0 ? "" : ", ") + getKey(i);
}
hashes += "}";
return "HashesChunk[" + getSubtreeSize() + ", " + getKey() + ", " + hashes + "]";
}
}
private class TreeSize {
int depth;
long treeSize;
public TreeSize(long dataSize) {
treeSize = chunkSize;
for (; treeSize < dataSize; treeSize *= branches) {
depth++;
}
}
}
private int branches;
private MessageDigest hasher;
private int hashSize;
private long chunkSize;
public TreeChunker() {
this(DEFAULT_BRANCHES, DEFAULT_HASHER);
}
public TreeChunker(int branches, MessageDigest hasher) {
this.branches = branches;
this.hasher = hasher;
hashSize = hasher.getDigestLength();
chunkSize = hashSize * branches;
}
public long getChunkSize() {
return chunkSize;
}
@Override
public Key split(SectionReader sectionReader, Collection<Chunk> consumer) {
TreeSize ts = new TreeSize(sectionReader.getSize());
return splitImpl(ts.depth, ts.treeSize/branches, sectionReader, consumer);
}
private Key splitImpl(int depth, long treeSize, SectionReader data, Collection<Chunk> consumer) {
long size = data.getSize();
TreeChunk newChunk;
while (depth > 0 && size < treeSize) {
treeSize /= branches;
depth--;
}
if (depth == 0) {
newChunk = new TreeChunk((int) size); // safe to cast since leaf chunk size < 2Gb
data.read(newChunk.getData(), newChunk.getDataOffset());
} else {
// intermediate chunk containing child nodes hashes
int branchCnt = (int) ((size + treeSize - 1) / treeSize);
HashesChunk hChunk = new HashesChunk(size);
long pos = 0;
long secSize;
// TODO the loop can be parallelized
for (int i = 0; i < branchCnt; i++) {
// the last item can have shorter data
if (size-pos < treeSize) {
secSize = size - pos;
} else {
secSize = treeSize;
}
// take the section of the data corresponding encoded in the subTree
SectionReader subTreeData = new SlicedReader(data, pos, secSize);
// the hash of that data
Key subTreeKey = splitImpl(depth-1, treeSize/branches, subTreeData, consumer);
hChunk.setKey(i, subTreeKey);
pos += treeSize;
}
// now we got the hashes in the chunk, then hash the chunk
newChunk = hChunk;
}
consumer.add(newChunk);
// report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk)x
return newChunk.getKey();
}
@Override
public SectionReader join(ChunkStore chunkStore, Key key) {
return new LazyChunkReader(chunkStore, key);
}
@Override
public long keySize() {
return hashSize;
}
private class LazyChunkReader implements SectionReader {
Key key;
ChunkStore chunkStore;
final long size;
final Chunk root;
public LazyChunkReader(ChunkStore chunkStore, Key key) {
this.chunkStore = chunkStore;
this.key = key;
root = chunkStore.get(key);
this.size = new TreeChunk(root).getSubtreeSize();
}
@Override
public int readAt(byte[] dest, int destOff, long readerOffset) {
int size = dest.length - destOff;
TreeSize ts = new TreeSize(this.size);
return readImpl(dest, destOff, root, ts.treeSize, 0, readerOffset,
readerOffset + min(size, this.size - readerOffset));
}
private int readImpl(byte[] dest, int destOff, Chunk chunk, long chunkWidth, long chunkStart,
long readStart, long readEnd) {
long chunkReadStart = max(readStart - chunkStart, 0);
long chunkReadEnd = min(chunkWidth, readEnd - chunkStart);
int ret = 0;
if (chunkWidth > chunkSize) {
long subChunkWidth = chunkWidth / branches;
if (chunkReadStart >= chunkWidth || chunkReadEnd <= 0) {
throw new RuntimeException("Not expected.");
}
int startSubChunk = (int) (chunkReadStart / subChunkWidth);
int lastSubChunk = (int) ((chunkReadEnd - 1) / subChunkWidth);
// TODO the loop can be parallelized
for (int i = startSubChunk; i <= lastSubChunk; i++) {
HashesChunk hChunk = new HashesChunk(chunk);
Chunk subChunk = chunkStore.get(hChunk.getKey(i));
ret += readImpl(dest, (int) (destOff + (i - startSubChunk) * subChunkWidth),
subChunk, subChunkWidth, chunkStart + i * subChunkWidth, readStart, readEnd);
}
} else {
TreeChunk dataChunk = new TreeChunk(chunk);
ret = (int) (chunkReadEnd - chunkReadStart);
System.arraycopy(dataChunk.getData(), (int) (dataChunk.getDataOffset() + chunkReadStart),
dest, destOff, ret);
}
return ret;
}
@Override
public long seek(long offset, int whence) {
throw new RuntimeException("Not implemented");
}
@Override
public long getSize() {
return size;
}
@Override
public int read(byte[] dest, int destOff) {
return readAt(dest, destOff, 0);
}
}
/**
* A 'subReader'
*/
public static class SlicedReader implements SectionReader {
SectionReader delegate;
long offset;
long len;
public SlicedReader(SectionReader delegate, long offset, long len) {
this.delegate = delegate;
this.offset = offset;
this.len = len;
}
@Override
public long seek(long offset, int whence) {
return delegate.seek(this.offset + offset, whence);
}
@Override
public int read(byte[] dest, int destOff) {
return delegate.readAt(dest, destOff, offset);
}
@Override
public int readAt(byte[] dest, int destOff, long readerOffset) {
return delegate.readAt(dest, destOff, offset + readerOffset);
}
@Override
public long getSize() {
return len;
}
}
}