/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cn.ac.ncic.mastiff.io.coding;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.IOUtils;
import cn.ac.ncic.mastiff.Chunk;
import cn.ac.ncic.mastiff.ValPair;
import cn.ac.ncic.mastiff.io.MultiChunk;
import cn.ac.ncic.mastiff.io.coding.Compression.Algorithm;
import cn.ac.ncic.mastiff.utils.Bytes;
/**
* <p>
* Decode the <i>MultiChunk</i>s from a encode page.
* A encoded page just produce one <i>MultiChunk</i>
* </p>
*
* @see MultiChunk
*/
public class MVDecoder implements Decoder {
static final Log LOG = LogFactory.getLog(MVDecoder.class);
ValPair pair = new ValPair();
int valueLen;
MultiChunk mvChunk;
MultiChunk shadowChunk = null;
/** Compressed data */
byte[] page = null;
int offset;
int compressedSize;
int decompressedSize;
/** statistic of a page */
int numPairs;
int startPos;
ByteBuffer bb;
/** a index area if the cluster is var-length */
int indexOffset;
// used for iteration
int curIdx = 1;
Algorithm compressAlgo;
DataInputBuffer inBuf = new DataInputBuffer();
/**
* MVDecoder
* @param sortedCol which column to sorted
* @param valueLen_ the length of the value
* @param algorithm which compression algorithm used to compress the data
*/
public MVDecoder(int sortedCol, int valueLen_, Algorithm algorithm) {;
valueLen = valueLen_;
compressAlgo = algorithm;
mvChunk = new MultiChunk(sortedCol, true, true, valueLen_);
}
@Override
public ValPair begin() throws IOException {
ensureDecompress();
pair.data = page;
pair.offset = offset + 3 * Bytes.SIZEOF_INT;
pair.length = valueLen == -1 ?
Bytes.toInt(page, offset + indexOffset) - 3 * Bytes.SIZEOF_INT : valueLen;
pair.pos = startPos;
return pair;
}
@Override
public int beginPos() {
return startPos;
}
@Override
public ValPair end() throws IOException {
ensureDecompress();
if (numPairs == 1)
return begin();
else {
pair.data = page;
pair.pos = startPos + numPairs - 1;
if (valueLen == -1) {
int lastPairOffset = Bytes.toInt(page, offset + indexOffset + (numPairs - 2) * Bytes.SIZEOF_INT);
pair.offset = offset + lastPairOffset;
pair.length = indexOffset - lastPairOffset;
} else {
pair.offset = offset + 3 * Bytes.SIZEOF_INT + (numPairs - 1) * valueLen;
pair.length = valueLen;
}
return pair;
}
}
@Override
public int endPos() {
return startPos + numPairs - 1;
}
@Override
public byte[] getBuffer() {
return page;
}
@Override
public int getBufferLen() {
return decompressedSize;
}
@Override
public int getNumPairs() {
return numPairs;
}
@Override
public boolean hashNextChunk() {
return curIdx < 1;
}
@Override
public Chunk nextChunk() throws IOException {
if (curIdx >= 1) return null;
ensureDecompress();
mvChunk.setBuffer(page, offset + 3 * Bytes.SIZEOF_INT,
offset + indexOffset, numPairs, startPos);
curIdx++;
return mvChunk;
}
@Override
public Chunk getChunkByPosition(int position) throws IOException {
if (position < startPos || position >= startPos + numPairs)
return null;
if (shadowChunk == null)
shadowChunk = new MultiChunk(0, true, true, valueLen);
ensureDecompress();
shadowChunk.setBuffer(page, offset + 3 * Bytes.SIZEOF_INT,
offset + indexOffset, numPairs, startPos);
return shadowChunk;
}
@Override
public void reset() {
curIdx = 1;
}
@Override
public void reset(byte[] buffer, int offset, int length) {
this.offset = offset;
compressedSize = length;
bb = ByteBuffer.wrap(buffer, offset, compressedSize);
decompressedSize = bb.getInt();
numPairs = bb.getInt();
startPos = bb.getInt();
curIdx = 0;
indexOffset = valueLen == -1 ? decompressedSize - numPairs * Bytes.SIZEOF_INT : -1;
if (compressAlgo == null)
page = buffer;
else {
inBuf.reset(buffer, offset + 3 * Bytes.SIZEOF_INT, compressedSize - 3 * Bytes.SIZEOF_INT);
page = null;
}
}
public void ensureDecompress() throws IOException {
if (compressAlgo != null && page == null) {
org.apache.hadoop.io.compress.Decompressor decompressor = this.compressAlgo.getDecompressor();
InputStream is = this.compressAlgo.createDecompressionStream(inBuf, decompressor, 0);
ByteBuffer buf = ByteBuffer.allocate(decompressedSize);
IOUtils.readFully(is, buf.array(), 3 * Bytes.SIZEOF_INT, buf.capacity() - 3 * Bytes.SIZEOF_INT);
is.close();
this.compressAlgo.returnDecompressor(decompressor);
page = buf.array();
}
}
@Override
public boolean skipToPos(int pos) {
if (pos < startPos || pos >= startPos + numPairs)
return false;
return true;
}
@Override
public byte[] ensureDecompressed() throws IOException {
// TODO Auto-generated method stub
return null;
}
}