/**
* File: $HeadURL: https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java $
* Revision: $Rev: 130 $
* Last modified: $Date: 2013-01-21 00:09:42 +0000 (lun, 21 ene 2013) $
* Last modified by: $Author: mario.arias $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Contacting the authors:
* Mario Arias: mario.arias@deri.org
* Javier D. Fernandez: jfergar@infor.uva.es
* Miguel A. Martinez-Prieto: migumar2@infor.uva.es
* Alejandro Andres: fuzzy.alej@gmail.com
*/
package org.rdfhdt.hdt.compact.sequence;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import org.rdfhdt.hdt.compact.integer.VByte;
import org.rdfhdt.hdt.exceptions.CRCException;
import org.rdfhdt.hdt.exceptions.IllegalFormatException;
import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.util.BitUtil;
import org.rdfhdt.hdt.util.crc.CRC32;
import org.rdfhdt.hdt.util.crc.CRC8;
import org.rdfhdt.hdt.util.crc.CRCInputStream;
import org.rdfhdt.hdt.util.crc.CRCOutputStream;
import org.rdfhdt.hdt.util.io.IOUtil;
import pl.edu.icm.jlargearrays.LongLargeArray;
import pl.edu.icm.jlargearrays.LargeArrayUtils;
/**
* @author mario.arias,Lyudmila Balakireva
*
*/
public class SequenceLog64Big implements DynamicSequence {
private static final byte W = 64;
private static final int INDEX = 1073741824;
LongLargeArray data;
private int numbits;
private long numentries=0;
private long maxvalue;
public SequenceLog64Big() {
this(W);
}
public SequenceLog64Big(int numbits) {
this(numbits, 0);
}
public SequenceLog64Big(int numbits, long capacity) {
this.numentries = 0;
this.numbits = numbits;
this.maxvalue = BitUtil.maxVal(numbits);
long size = numWordsFor(numbits, capacity);
LongLargeArray.setMaxSizeOf32bitArray(SequenceLog64Big.INDEX);
data = new LongLargeArray(Math.max((int)size,1));
}
public SequenceLog64Big(int numbits, long capacity, boolean initialize) {
this(numbits, capacity);
if(initialize) {
numentries = capacity;
}
}
/** longs required to represent "total" integers of "bitsField" bits each */
public static final long numWordsFor(int bitsField, long total) {
return ((bitsField*total+63)/64);
}
/** Number of bits required for last word */
public static final long lastWordNumBits(int bitsField, long total) {
long totalBits = bitsField*total;
if(totalBits==0) {
return 0;
}
return (long) ((totalBits-1) % W)+1; // +1 To have output in the range 1-64, -1 to compensate.
}
/** Number of bits required for last word */
public static final long lastWordNumBytes(int bitsField, long total) {
return ((lastWordNumBits(bitsField, total)-1)/8)+1; // +1 To have output in the range 1-8, -1 to compensate.
}
/** Number of bytes required to represent n integers of e bits each */
public static final long numBytesFor(int bitsField, long total) {
return (bitsField*total+7)/8;
}
/** Retrieve a given index from array data where every value uses bitsField bits
* @param data Array
* @param bitsField Length in bits of each field
* @param index Position to be retrieved
*/
private static final long getField(LongLargeArray data, int bitsField, long index) {
if(bitsField==0) return 0;
long bitPos = index*bitsField;
long i=(long)(bitPos / W);
long j=(long)(bitPos % W);
long result;
if (j+bitsField <= W) {
result = (data.get(i) << (W-j-bitsField)) >>> (W-bitsField);
} else {
result = data.get(i) >>> j;
result = result | (data.get(i+1) << ( (W<<1) -j-bitsField)) >>> (W-bitsField);
}
return result;
}
/** Store a given value in index into array data where every value uses bitsField bits
* @param data Array
* @param bitsField Length in bits of each field
* @param index Position to store in
* @param value Value to be stored
*/
private static final void setField(LongLargeArray data, int bitsField, long index, long value) {
if(bitsField==0) return;
long bitPos = index*bitsField;
long i=(long)(bitPos/W);
long j=(long)(bitPos%W);
long mask = ~(~0L << bitsField) << j;
data.set(i, (data.getLong(i) & ~mask) | (value << j));
if((j+bitsField>W)) {
mask = ~0L << (bitsField+j-W);
data.set(i+1 , (data.get(i+1) & mask) | value >>> (W-j));
}
}
private final void resizeArray(long size) {
//data = Arrays.copyOf(data, size);
LongLargeArray a = new LongLargeArray(size);
if (size < data.length()) {
LargeArrayUtils.arraycopy(data, 0, a, 0, size);
}
else {
LargeArrayUtils.arraycopy(data, 0, a, 0, data.length());
}
data = a;
}
/* (non-Javadoc)
* @see hdt.triples.array.Stream#add(java.util.Iterator)
*/
@Override
public void add(Iterator<Long> elements) {
long max = 0;
numentries = 0;
// Count and calculate number of bits needed per element.
while(elements.hasNext()) {
long val = elements.next().longValue();
max = val>max ? val : max;
numentries++;
}
// Prepare array
numbits = BitUtil.log2(max);
long size = (long) numWordsFor(numbits, numentries);
data = new LongLargeArray(size);
// Save
int count = 0;
while(elements.hasNext()) {
long element = elements.next().longValue();
assert element<=maxvalue;
setField(data, numbits, count, element);
count++;
}
}
public void addIntegers(ArrayList<Integer> elements) {
long max = 0;
numentries = 0;
// Count and calculate number of bits needed per element.
for (int i=0;i<elements.size();i++){
long val = elements.get(i).longValue();
max = val>max ? val : max;
numentries++;
}
// Prepare array
numbits = BitUtil.log2(max);
long size = (long) numWordsFor(numbits, numentries);
data = new LongLargeArray(size);
// Save
int count = 0;
for (int i=0;i<elements.size();i++){
long element = elements.get(i).longValue();
assert element<=maxvalue;
setField(data, numbits, count, element);
count++;
}
}
/* (non-Javadoc)
* @see hdt.triples.array.Stream#get(int)
*/
@Override
public long get(long position) {
if(position<0 || position>=numentries) {
//System.out.println("pos, numentries:"+position+","+numentries);
//throw new IndexOutOfBoundsException();
}
return getField(data, numbits, position);
}
public void set(long position, long value) {
//if(value<0 || value>maxvalue) {
//throw new IllegalArgumentException("Value exceeds the maximum for this data structure");
//}
setField(data, numbits, position, value);
}
public void append(long value) {
//assert numentries<Integer.MAX_VALUE;
//if(value<0 || value>maxvalue) {
//throw new IllegalArgumentException("Value exceeds the maximum for this data structure");
//}
long neededSize = numWordsFor(numbits, numentries+1);
//System.out.println("append needed size:"+neededSize);
if(data.length()<neededSize) {
resizeArray(data.length()*2);
}
this.set((long)numentries, value);
numentries++;
}
public void aggresiveTrimToSize() {
long max = 0;
// Count and calculate number of bits needed per element.
for(long i=0; i<numentries; i++) {
long value = this.get(i);
max = value>max ? value : max;
}
int newbits = BitUtil.log2(max);
assert newbits <= numbits;
//System.out.println("newbits"+newbits);
if(newbits!=numbits) {
for(long i=0;i<numentries;i++) {
long value = getField(data, numbits, i);
setField(data, newbits, i, value);
}
numbits = newbits;
maxvalue = BitUtil.maxVal(numbits);
long totalSize = numWordsFor(numbits, numentries);
if (totalSize!=data.length()){
resizeArray((int)totalSize);
}
}
}
public void trimToSize() {
resizeArray((long)numWordsFor(numbits, numentries));
}
public void resize(long numentries) {
this.numentries = numentries;
resizeArray((long)numWordsFor(numbits, numentries));
}
/* (non-Javadoc)
* @see hdt.triples.array.Stream#getNumberOfElements()
*/
@Override
public long getNumberOfElements() {
return numentries;
}
/* (non-Javadoc)
* @see hdt.triples.array.Stream#save(java.io.OutputStream, hdt.ProgressListener)
*/
@Override
public void save(OutputStream output, ProgressListener listener) throws IOException {
CRCOutputStream out = new CRCOutputStream(output, new CRC8());
out.write(SequenceFactory.TYPE_SEQLOG);
out.write(numbits);
VByte.encode(out, numentries);
out.writeCRC();
out.setCRC(new CRC32());
long numwords = (long)numWordsFor(numbits, numentries);
for(long i=0;i<numwords-1;i++) {
IOUtil.writeLong(out, data.getLong(i));
}
if(numwords>0) {
// Write only used bits from last entry (byte aligned, little endian)
long lastWordUsedBits = lastWordNumBits(numbits, numentries);
BitUtil.writeLowerBitsByteAligned(data.get(numwords-1), lastWordUsedBits, out);
}
out.writeCRC();
}
/* (non-Javadoc)
* @see hdt.triples.array.Stream#load(java.io.InputStream, hdt.ProgressListener)
*/
@Override
public void load(InputStream input, ProgressListener listener) throws IOException {
CRCInputStream in = new CRCInputStream(input, new CRC8());
int type = in.read();
if(type!=SequenceFactory.TYPE_SEQLOG){
throw new IllegalFormatException("Trying to read a LogArray but the data is not LogArray");
}
numbits = in.read();
numentries = VByte.decode(in);
if(!in.readCRCAndCheck()) {
throw new CRCException("CRC Error while reading LogArray64 header.");
}
if(numbits>64) {
throw new IllegalFormatException("LogArray64 cannot deal with more than 64bit per entry");
}
in.setCRC(new CRC32());
long numwords = (long)numWordsFor(numbits, numentries);
data = new LongLargeArray(numwords);
for(long i=0;i<numwords-1;i++) {
data.set(i , IOUtil.readLong(in));
}
if(numwords>0) {
// Read only used bits from last entry (byte aligned, little endian)
long lastWordUsed = lastWordNumBits(numbits, numentries);
data.set(numwords-1 , BitUtil.readLowerBitsByteAligned(lastWordUsed, in));
}
if(!in.readCRCAndCheck()) {
throw new CRCException("CRC Error while reading LogArray64 data.");
}
}
/* (non-Javadoc)
* @see hdt.triples.array.Stream#size()
*/
@Override
public long size() {
return numBytesFor(numbits, numentries);
}
public long getRealSize() {
return data.length()*8L;
}
public int getNumBits() {
return numbits;
}
/* (non-Javadoc)
* @see hdt.compact.array.Stream#getType()
*/
@Override
public String getType() {
return HDTVocabulary.SEQ_TYPE_LOG;
}
@Override
public void close() throws IOException {
data=null;
}
}