package org.yamcs.parameterarchive; import java.lang.reflect.Array; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import me.lemire.integercompression.FastPFOR128; import me.lemire.integercompression.IntWrapper; import org.yamcs.utils.DecodingException; import org.yamcs.utils.IntArray; import org.yamcs.utils.VarIntUtil; /** * Segment for all non primitive types. * * Each element is encoded to a binary that is not compressed. The compression of the segment (if any) is realized by not repeating elements. * * Finds best encoding among: * - raw - list of values stored verbatim, each preceded by its size varint32 encoded * - enum - the list of unique values are stored at the beginning of the segment - each value has an implicit id (the order in the list) * - the rest of the segment is the list of ids and can be encoded in one of the following formats * - VB: varint32 of each id * - FPROF: coded with the FPROF codec + varint32 of remaining * - RLE: run length encoded * * * @author nm * */ public class ObjectSegment<E> extends BaseSegment { final static byte SUBFORMAT_ID_RAW = 0; final static byte SUBFORMAT_ID_ENUM_RLE = 1; final static byte SUBFORMAT_ID_ENUM_VB = 2; final static byte SUBFORMAT_ID_ENUM_FPROF = 3; //this is set only during deserialisation. boolean runLengthEncoded = false; //one of the lists below is used depending whether runLengthEncoded is true or false List<E> objectList; List<E> rleObjectList; IntArray rleCounts; int size = 0; final ObjectSerializer<E> objSerializer; //temporary fields used during the construction before serialisation - could be probably refactored into some builder which returns another object in the consolidate method List<HashableByteArray> serializedObjectList; Map<HashableByteArray, Integer> valuemap; IntArray rleValues; IntArray enumValues; List<HashableByteArray> unique; int rawSize; int enumRawSize; int enumRleSize; boolean consolidated = false; /** * b * @param objSerializer * @param buildForSerialisation - is set to true at the construction and false at deserialisation */ ObjectSegment(ObjectSerializer<E> objSerializer, boolean buildForSerialisation) { super(objSerializer.getFormatId()); this.objSerializer = objSerializer; if(buildForSerialisation) { objectList = new ArrayList<E>(); serializedObjectList = new ArrayList<HashableByteArray>(); unique = new ArrayList<HashableByteArray>(); valuemap = new HashMap<>(); enumValues = new IntArray(); } //else in the parseFrom will construct the necessary fields } /** * add element to the end of the segment * * @param e */ public void add(E e) { byte[] b = objSerializer.serialize(e); HashableByteArray se = new HashableByteArray(b); int valueId; if(valuemap.containsKey(se)) { valueId = valuemap.get(se); se = unique.get(valueId); //release the old se object to garbage } else { valueId = unique.size(); valuemap.put(se, valueId); unique.add(se); } enumValues.add(valueId); serializedObjectList.add(se); objectList.add(e); size++; } public void add(int pos, E e) { if(pos==size) { add(e); return; } byte[] b = objSerializer.serialize(e); HashableByteArray se = new HashableByteArray(b); int valueId; if(valuemap.containsKey(se)) { valueId = valuemap.get(se); se = unique.get(valueId); //release the old se object to garbage } else { valueId = unique.size(); valuemap.put(se, valueId); unique.add(se); } enumValues.add(pos, valueId); serializedObjectList.add(pos, se); objectList.add(pos, e); size++; } @Override public void writeTo(ByteBuffer bb) { if(!consolidated) throw new IllegalStateException("The segment has to be consolidated before serialization can take place"); boolean encoded = false; int position = bb.position(); try { if(enumRleSize<=enumRawSize && enumRleSize<=rawSize) { encoded = writeEnumRle(bb); } else if(enumRawSize<enumRleSize && enumRawSize<=rawSize) { encoded = writeEnumFprof(bb); } } catch (IndexOutOfBoundsException e) { encoded = false; } if(!encoded) { bb.position(position); writeRaw(bb); } } public void writeRaw(ByteBuffer bb) { bb.put(SUBFORMAT_ID_RAW); //write the size VarIntUtil.writeVarInt32(bb, objectList.size()); //then write the values for(int i=0; i<size; i++) { byte[] b = serializedObjectList.get(i).b; VarIntUtil.writeVarInt32(bb, b.length); bb.put(b); } } boolean writeEnumFprof(ByteBuffer bb) { int position = bb.position(); bb.put(SUBFORMAT_ID_ENUM_FPROF); //first write the enum values VarIntUtil.writeVarInt32(bb, unique.size()); for(int i=0; i<unique.size(); i++) { byte[] b = unique.get(i).b; VarIntUtil.writeVarInt32(bb, b.length); bb.put(b); } //then writes the enum ids VarIntUtil.writeVarInt32(bb, size); FastPFOR128 fastpfor = FastPFORFactory.get(); IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); int[] out = new int[size]; int[] in = enumValues.array(); fastpfor.compress(in, inputoffset, size, out, outputoffset); if (outputoffset.get() == 0) { //fastpfor didn't compress anything, probably there were too few datapoints bb.put(position, SUBFORMAT_ID_ENUM_VB); } else { //write the fastpfor output for(int i=0; i<outputoffset.get(); i++) { bb.putInt(out[i]); } } //write the remaining bytes varint compressed for(int i = inputoffset.get(); i<size; i++) { VarIntUtil.writeVarInt32(bb, in[i]); } return true; } boolean writeEnumRle(ByteBuffer bb) { bb.put(SUBFORMAT_ID_ENUM_RLE); //first write the enum values VarIntUtil.writeVarInt32(bb, unique.size()); for(int i=0; i<unique.size(); i++) { byte[] b = unique.get(i).b; VarIntUtil.writeVarInt32(bb, b.length); bb.put(b); } //then write the rleCounts VarIntUtil.writeVarInt32(bb, rleCounts.size()); for(int i=0; i< rleCounts.size(); i++) { VarIntUtil.writeVarInt32(bb, rleCounts.get(i)); } //and write the rleValues for(int i=0; i< rleCounts.size(); i++) { VarIntUtil.writeVarInt32(bb, rleValues.get(i)); } return true; } protected void parse(ByteBuffer bb) throws DecodingException { byte formatId = bb.get(); try { switch(formatId) { case SUBFORMAT_ID_RAW: parseRaw(bb); break; case SUBFORMAT_ID_ENUM_VB: //intentional fall trough case SUBFORMAT_ID_ENUM_FPROF://intentional fall trough case SUBFORMAT_ID_ENUM_RLE: parseEnum(formatId, bb); break; default: throw new DecodingException("Unknown subformatid: "+formatId); } } catch (DecodingException e) { throw e; } catch (Exception e) { throw new DecodingException("Cannot decode object segment subformatId "+formatId, e); } } private void parseRaw(ByteBuffer bb) throws DecodingException { size = VarIntUtil.readVarInt32(bb); objectList = new ArrayList<E>(size); for(int i = 0; i<size; i++) { int l = VarIntUtil.readVarInt32(bb); byte[] b = new byte[l]; bb.get(b); E e = objSerializer.deserialize(b); objectList.add(e); } } void parseEnum(int formatId, ByteBuffer bb) throws DecodingException { int n = VarIntUtil.readVarInt32(bb); List<E> uniqueValues = new ArrayList<E>(); for(int i = 0;i<n; i++) { int l = VarIntUtil.readVarInt32(bb); byte[] b = new byte[l]; bb.get(b); E e = objSerializer.deserialize(b); uniqueValues.add(e); } if(formatId == SUBFORMAT_ID_ENUM_RLE) { parseEnumRle(uniqueValues, bb); } else { parseEnumNonRle(formatId, uniqueValues, bb); } } private void parseEnumNonRle(int formatId, List<E> uniqueValues, ByteBuffer bb) throws DecodingException { size = VarIntUtil.readVarInt32(bb); int position = bb.position(); int[] enumValues = new int[size]; IntWrapper outputoffset = new IntWrapper(0); if(formatId==SUBFORMAT_ID_ENUM_FPROF) { int[] x = new int[(bb.limit()-position)/4]; for(int i=0; i<x.length;i++) { x[i] = bb.getInt(); } IntWrapper inputoffset = new IntWrapper(0); FastPFOR128 fastpfor = FastPFORFactory.get(); fastpfor.uncompress(x, inputoffset, x.length, enumValues, outputoffset); bb.position(position+inputoffset.get()*4); } for(int i = outputoffset.get(); i<size;i++) { enumValues[i] = VarIntUtil.readVarInt32(bb); } objectList = new ArrayList<E>(size); for(int i =0 ; i<size; i++) { objectList.add(uniqueValues.get(enumValues[i])); } } private void parseEnumRle(List<E> uniqueValues, ByteBuffer bb ) throws DecodingException{ int countNum = VarIntUtil.readVarInt32(bb); rleCounts = new IntArray(countNum); size = 0; for(int i=0; i<countNum; i++) { int c = VarIntUtil.readVarInt32(bb); rleCounts.add(c); size+=c; } rleObjectList = new ArrayList<>(countNum); for(int i=0; i<countNum; i++) { int c = VarIntUtil.readVarInt32(bb); rleObjectList.add(uniqueValues.get(c)); } runLengthEncoded = true; } @Override public int getMaxSerializedSize() { return rawSize; } @Override public E[] getRange(int posStart, int posStop, boolean ascending) { if(posStart>=posStop) throw new IllegalArgumentException("posStart has to be smaller than posStop"); if(runLengthEncoded) { if(ascending) { return getRleRangeAscending(posStart, posStop); } else { return getRleRangeDescending(posStart, posStop); } } else { return getNonRleRange(posStart, posStop, ascending); } } E[] getNonRleRange(int posStart, int posStop, boolean ascending) { @SuppressWarnings("unchecked") E[] r = (E[]) Array.newInstance(objectList.get(0).getClass(), posStop-posStart); if(ascending) { for(int i = posStart; i<posStop; i++) { r[i-posStart] = objectList.get(i); } } else { for(int i = posStop; i>posStart; i--) { r[posStop-i] = objectList.get(i); } } return r; } E[] getRleRangeAscending(int posStart, int posStop) { int n = posStop-posStart; @SuppressWarnings("unchecked") E[] r = (E[]) Array.newInstance(rleObjectList.get(0).getClass(), n); int k = posStart; int i = 0; while(k>=rleCounts.get(i)) { k-=rleCounts.get(i++); } int pos = 0; while(pos<n) { r[pos++] = rleObjectList.get(i); k++; if(k>=rleCounts.get(i)) { i++; k=0; } } return r; } public E[] getRleRangeDescending(int posStart, int posStop) { if(posStop>=size) throw new IndexOutOfBoundsException("Index: "+posStop+" size: "+size); int n = posStop-posStart; @SuppressWarnings("unchecked") E[] r = (E[]) Array.newInstance(rleObjectList.get(0).getClass(), n); int k = size - posStop; int i = rleCounts.size()-1; while(k > rleCounts.get(i)) { k-=rleCounts.get(i--); } k=rleCounts.get(i)-k; int pos = 0; while(true) { r[pos++] = rleObjectList.get(i); if(pos==n) break; k--; if(k<0) { i--; k = rleCounts.get(i)-1; } } return r; } public E get(int index) { if(runLengthEncoded) { int k = 0; int i = 0; while(k<=index) { k += rleCounts.get(i); i++; } return rleObjectList.get(i-1); } else { return objectList.get(index); } } /** * the number of elements in this segment (not taking into account any compression due to run-length encoding) * @return */ @Override public int size() { return size; } ObjectSegment<E> consolidate() { rleCounts = new IntArray(); rleValues = new IntArray(); rawSize = enumRawSize = enumRleSize = 1; //subFormatId byte rawSize += VarIntUtil.getEncodedSize(size); enumRawSize += VarIntUtil.getEncodedSize(size)+VarIntUtil.getEncodedSize(unique.size()); enumRleSize += VarIntUtil.getEncodedSize(unique.size()); for(int i=0; i<size; i++) { HashableByteArray se = serializedObjectList.get(i); byte[] b = se.b; int valueId = enumValues.get(i); rawSize+= VarIntUtil.getEncodedSize(b.length)+b.length; enumRawSize+=VarIntUtil.getEncodedSize(valueId); boolean rleAdded = false; int rleId = rleValues.size()-1; if(rleId>=0) { int lastValueId = rleValues.get(rleId); if(valueId == lastValueId) { rleCounts.set(rleId, rleCounts.get(rleId)+1); rleAdded = true; } } if(!rleAdded) { rleCounts.add(1); rleValues.add(valueId); } } for(int i = 0; i<unique.size(); i++) { HashableByteArray se = unique.get(i); byte[] b = se.b; int s = VarIntUtil.getEncodedSize(b.length)+b.length; enumRawSize+=s; enumRleSize+=s; } enumRleSize += VarIntUtil.getEncodedSize(rleCounts.size()); for(int i =0 ;i<rleCounts.size();i++) { enumRleSize += VarIntUtil.getEncodedSize(rleCounts.get(i))+VarIntUtil.getEncodedSize(rleValues.get(i)); } consolidated = true; return this; } @SuppressWarnings("rawtypes") @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; ObjectSegment other = (ObjectSegment) obj; if (serializedObjectList == null) { if (other.serializedObjectList != null) return false; } else if (!serializedObjectList.equals(other.serializedObjectList)) return false; return true; } } /** * wrapper around byte[] to allow it to be used in HashMaps */ class HashableByteArray { private int hash =0 ; final byte[] b; public HashableByteArray(byte[] b) { this.b = b ; } @Override public int hashCode() { if (hash == 0) { hash = Arrays.hashCode(b); } return hash; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; HashableByteArray other = (HashableByteArray) obj; if(hashCode()!=other.hashCode()) return false; if (!Arrays.equals(b, other.b)) return false; return true; } } interface ObjectSerializer<E> { byte getFormatId(); E deserialize(byte[] b) throws DecodingException; byte[] serialize(E e); }