/** * File: $HeadURL: https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/compact/sequence/SequenceLog64Map.java $ * Revision: $Rev: 191 $ * Last modified: $Date: 2013-03-03 11:41:43 +0000 (dom, 03 mar 2013) $ * Last modified by: $Author: mario.arias $ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Contacting the authors: * Mario Arias: mario.arias@deri.org * Javier D. Fernandez: jfergar@infor.uva.es * Miguel A. Martinez-Prieto: migumar2@infor.uva.es * Alejandro Andres: fuzzy.alej@gmail.com */ package org.rdfhdt.hdt.compact.sequence; import java.io.BufferedInputStream; import java.io.Closeable; import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; import java.util.Iterator; import org.rdfhdt.hdt.compact.integer.VByte; import org.rdfhdt.hdt.exceptions.CRCException; import org.rdfhdt.hdt.exceptions.IllegalFormatException; import org.rdfhdt.hdt.exceptions.NotImplementedException; import org.rdfhdt.hdt.hdt.HDTVocabulary; import org.rdfhdt.hdt.listener.ProgressListener; import org.rdfhdt.hdt.util.BitUtil; import org.rdfhdt.hdt.util.crc.CRC32; import org.rdfhdt.hdt.util.crc.CRC8; import org.rdfhdt.hdt.util.crc.CRCInputStream; import org.rdfhdt.hdt.util.crc.CRCOutputStream; import org.rdfhdt.hdt.util.io.CountInputStream; import org.rdfhdt.hdt.util.io.IOUtil; /** * @author mario.arias * */ public class SequenceLog64Map implements Sequence,Closeable { private static final byte W = 64; private static final long LONGS_PER_BUFFER=128*1024*1024; // 128*8 = 1Gb per chunk. private ByteBuffer [] buffers; private FileChannel ch; private int numbits; private long numentries; private long lastword; private long numwords; public SequenceLog64Map(File f) throws IOException { // Read from the beginning of the file this(new CountInputStream(new BufferedInputStream(new FileInputStream(f))), f, true); } public SequenceLog64Map(CountInputStream in, File f) throws IOException { this(in, f, false); } private SequenceLog64Map(CountInputStream in, File f, boolean closeInput) throws IOException { CRCInputStream crcin = new CRCInputStream(in, new CRC8()); int type = crcin.read(); if(type!=SequenceFactory.TYPE_SEQLOG){ throw new IllegalFormatException("Trying to read a LogArray but the data is not LogArray"); } numbits = crcin.read(); numentries = VByte.decode(crcin); if(!crcin.readCRCAndCheck()) { throw new CRCException("CRC Error while reading LogArray64 header."); } if(numbits>64) { throw new IllegalFormatException("LogArray64 cannot deal with more than 64bit per entry"); } long base = in.getTotalBytes(); numwords = SequenceLog64.numWordsFor(numbits, numentries); if(numwords>0) { IOUtil.skip(in, (numwords-1)*8L); // Read only used bits from last entry (byte aligned, little endian) int lastWordUsed = SequenceLog64.lastWordNumBits(numbits, numentries); lastword = BitUtil.readLowerBitsByteAligned(lastWordUsed, in); // System.out.println("LastWord0: "+Long.toHexString(lastword)); } IOUtil.skip(in, 4); // CRC mapFiles(f, base); if(closeInput) { in.close(); } } public SequenceLog64Map(int numbits, long numentries, File f) throws IOException { this.numbits = numbits; this.numentries = numentries; this.numwords = SequenceLog64.numWordsFor(numbits, numentries); mapFiles(f, 0); } private void mapFiles(File f, long base) throws IOException { // Read packed data ch = new FileInputStream(f).getChannel(); long maxSize = base+SequenceLog64.numBytesFor(numbits, numentries); int buffer = 0; long block=0; buffers = new ByteBuffer[ (int)(1L+numwords/LONGS_PER_BUFFER) ]; while(block<numwords) { long current = base+ buffer*8L*LONGS_PER_BUFFER; long next = current+8L*LONGS_PER_BUFFER; long length = Math.min(maxSize, next)-current; // System.out.println("Ini: "+current+ " Max: "+ next+ " Length: "+length); buffers[buffer] = ch.map(MapMode.READ_ONLY, current , length ); buffers[buffer].order(ByteOrder.LITTLE_ENDIAN); block+=LONGS_PER_BUFFER; buffer++; } // Read lastWord (it is special because it can be smaller than 8 bytes) // lastword = 0; // if(numwords>0) { // ByteBuffer lastBuffer = buffers[buffers.length-1]; // int pos = lastBuffer.limit()-1; // int numBytesLast = SequenceLog64.lastWordNumBytes(numbits, numentries); // while(pos>=lastBuffer.limit()-numBytesLast) { // long read = (lastBuffer.get(pos) & 0xFFL); // System.out.println("Byte: "+pos+" / "+Long.toHexString(read)); // lastword = (lastword << 8) | read; // pos--; // } //// System.out.println("LastWord1: "+Long.toHexString(lastword)+" Bytes: "+numBytesLast); // } // FIXME: Bug in the previous code, find what because it should be more efficient CountInputStream in = new CountInputStream(new BufferedInputStream(new FileInputStream(f))); IOUtil.skip(in, base+((numwords-1)*8L)); // System.out.println("Last word starts at: "+in.getTotalBytes()); // Read only used bits from last entry (byte aligned, little endian) int lastWordUsedBits = SequenceLog64.lastWordNumBits(numbits, numentries); lastword = BitUtil.readLowerBitsByteAligned(lastWordUsedBits, in); // System.out.println("Last word ends at: "+in.getTotalBytes()); // System.out.println("LastWord2: "+Long.toHexString(lastword)+" Bits: "+lastWordUsedBits); in.close(); } private final long getWord(long w) { if(w==numwords-1) { return lastword; } ByteBuffer buffer = buffers[(int)(w/LONGS_PER_BUFFER)]; return buffer.getLong((int)((w%LONGS_PER_BUFFER)*8)); } /* (non-Javadoc) * @see hdt.triples.array.Stream#get(int) */ @Override public long get(long index) { if(index<0 || index>=numentries) { throw new IndexOutOfBoundsException(); } if(numbits==0) return 0; long bitPos = index*numbits; long i=bitPos / W; int j=(int)(bitPos % W); long result; if (j+numbits <= W) { result = (getWord(i) << (W-j-numbits)) >>> (W-numbits); } else { result = getWord(i) >>> j; result = result | (getWord(i+1) << ( (W<<1) -j-numbits)) >>> (W-numbits); } return result; } /* (non-Javadoc) * @see hdt.triples.array.Stream#getNumberOfElements() */ @Override public long getNumberOfElements() { return numentries; } /* (non-Javadoc) * @see hdt.triples.array.Stream#save(java.io.OutputStream, hdt.ProgressListener) */ @Override public void save(OutputStream output, ProgressListener listener) throws IOException { CRCOutputStream out = new CRCOutputStream(output, new CRC8()); out.write(SequenceFactory.TYPE_SEQLOG); out.write(numbits); VByte.encode(out, numentries); out.writeCRC(); out.setCRC(new CRC32()); int numwords = (int)SequenceLog64.numWordsFor(numbits, numentries); for(int i=0;i<numwords-1;i++) { IOUtil.writeLong(out, getWord(i)); } if(numwords>0) { // Write only used bits from last entry (byte aligned, little endian) int lastWordUsedBits = SequenceLog64.lastWordNumBits(numbits, numentries); BitUtil.writeLowerBitsByteAligned(lastword, lastWordUsedBits, out); } out.writeCRC(); } /* (non-Javadoc) * @see hdt.triples.array.Stream#size() */ @Override public long size() { return SequenceLog64.numBytesFor(numbits, numentries); } public int getNumBits() { return numbits; } /* (non-Javadoc) * @see hdt.compact.array.Stream#getType() */ @Override public String getType() { return HDTVocabulary.SEQ_TYPE_LOG; } @Override public void add(Iterator<Long> elements) { throw new NotImplementedException(); } @Override public void load(InputStream input, ProgressListener listener) throws IOException { throw new NotImplementedException(); } @Override public void close() throws IOException { ch.close(); } }