/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.solbase.lucenehbase;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Scorer;
import org.solbase.SolbaseUtil;
public class TermDocMetadata implements Serializable {
private static final long serialVersionUID = -7706773478353560562L;
public static final ByteBuffer positionVectorKeyBytes = ByteBuffer.wrap(Bytes.toBytes("P"));
public static final ByteBuffer offsetVectorKeyBytes = ByteBuffer.wrap(Bytes.toBytes("O"));
public static final ByteBuffer termFrequencyKeyBytes = ByteBuffer.wrap(Bytes.toBytes("F"));
public static final ByteBuffer normsKeyBytes = ByteBuffer.wrap(Bytes.toBytes("N"));
public static final ByteBuffer sortFieldKeyBytes = ByteBuffer.wrap(Bytes.toBytes("S"));
public static final int initialSortSlots = 5;
public int docId;
public int freq = -1;
public Byte norm = null;
public int[] positions = null;
public int[] offsets = null;
public int[] sortValues;
private Term term;
public TermDocMetadataVersionIdentifier versionIdentifier;
private byte[] fieldTermKeyBytes;
public TermDocMetadata(int docId, int freq) {
this.docId = docId;
this.freq = freq;
}
public TermDocMetadata(int docId, byte[] aBytes) {
ByteBuffer bytes_ = ByteBuffer.wrap(aBytes);
ByteBuffer bytes = bytes_.duplicate(); // don't mutate the original
create(docId, bytes);
}
public TermDocMetadata(int docId, ByteBuffer bytes) {
create(docId, bytes);
}
public TermDocMetadata(InputStream bb) throws IOException {
int docId = SolbaseUtil.mreadVInt(bb);
create(docId, bb);
}
public TermDocMetadata(int docId, InputStream bytes) throws IOException{
create(docId, bytes);
}
public TermDocMetadata(int docId, Map<ByteBuffer, List<Number>> data, byte[] fieldTermKeyBytes, Term term){
this.docId = docId;
this.term = term;
this.fieldTermKeyBytes = fieldTermKeyBytes;
for (Map.Entry<ByteBuffer, List<Number>> e : data.entrySet()) {
assert e.getKey() != null;
if (e.getKey().equals(normsKeyBytes)) {
norm = e.getValue().get(0).byteValue();
}
else if (e.getKey().equals(positionVectorKeyBytes)) {
List<Number> p = e.getValue();
positions = new int[p.size()];
for (int i = 0; i < positions.length; i++)
positions[i] = p.get(i).intValue();
}
else if (e.getKey().equals(offsetVectorKeyBytes)) {
List<Number> o = e.getValue();
offsets = new int[o.size()];
for (int i = 0; i < offsets.length; i++)
offsets[i] = o.get(i).intValue();
}
else if (e.getKey().equals(termFrequencyKeyBytes)) {
List<Number> value = e.getValue();
freq = value.size() == 0 ? 0 : value.get(0).intValue();
}
else if (e.getKey().equals(sortFieldKeyBytes)){
List<Number> sortValues = e.getValue();
this.sortValues = new int[Scorer.numSort];
for(int i = 0; i < Scorer.numSort; i++){
int val = (Integer) sortValues.get(i);
this.sortValues[i] = val;
}
}
else {
throw new IllegalArgumentException(Bytes.toString(e.getKey().array()));
}
}
if (freq < 0)
throw new IllegalArgumentException("term freq is < 0");
if (positions != null && freq != positions.length)
throw new IllegalArgumentException("freq != position count: " + freq + " vs " + positions.length);
}
public void create(int docId, InputStream bytes) throws IOException {
byte flags;
// add in more flag bytes if we have more than default 5 sortable fields
if(Scorer.numSort > initialSortSlots){
float diff = Scorer.numSort - initialSortSlots;
int numBytes = (int) Math.ceil(diff / 8);
flags = (byte) bytes.read();
while(numBytes > 0){
bytes.read();
numBytes--;
}
} else {
flags = (byte) bytes.read();
}
boolean hasNorm = (flags & 1) == 1;
boolean hasPositions = (flags & 2) == 2;
boolean hasOffsets = (flags & 4) == 4;
freq = SolbaseUtil.mreadVInt(bytes);
norm = hasNorm ? ((byte)bytes.read()) : null;
int[] positions_ = null;
if (hasPositions) {
positions_ = new int[freq];
for (int i = 0; i < freq; i++)
positions_[i] = SolbaseUtil.mreadVInt(bytes);
}
positions = positions_;
int[] offsets_ = null;
if (hasOffsets) {
int len = SolbaseUtil.mreadVInt(bytes);
offsets_ = new int[len];
for (int i = 0; i < len; i++)
offsets_[i] = SolbaseUtil.mreadVInt(bytes);
}
offsets = offsets_;
}
public void create(int docId, ByteBuffer bytes) {
this.docId = docId;
byte flags;
// add in more flag bytes if we have more than default 5 sortable fields
if(Scorer.numSort > initialSortSlots){
float diff = Scorer.numSort - initialSortSlots;
int numBytes = (int) Math.ceil(diff / 8);
flags = bytes.get();
while(numBytes > 0){
bytes.get();
numBytes--;
}
} else {
flags = bytes.get();
}
boolean hasNorm = (flags & 1) == 1;
boolean hasPositions = (flags & 2) == 2;
boolean hasOffsets = (flags & 4) == 4;
freq = SolbaseUtil.mreadVInt(bytes);
norm = hasNorm ? bytes.get() : null;
int[] positions_ = null;
if (hasPositions) {
positions_ = new int[freq];
for (int i = 0; i < freq; i++)
positions_[i] = SolbaseUtil.mreadVInt(bytes);
}
positions = positions_;
int[] offsets_ = null;
if (hasOffsets) {
int len = SolbaseUtil.mreadVInt(bytes);
offsets_ = new int[len];
for (int i = 0; i < len; i++)
offsets_[i] = SolbaseUtil.mreadVInt(bytes);
}
offsets = offsets_;
}
public int getDocId() {
return this.docId;
}
protected boolean hasPositions() {
return positions != null && positions.length != 0;
}
public boolean hasOffsets() {
return offsets != null && offsets.length != 0;
}
protected int[] getPositions() {
return null;
}
protected int[] getOffsets() {
return null;
}
protected boolean hasNorm() {
return true;
}
public byte getNorm() {
return norm;
}
protected boolean hasSortField(int index){
return sortValues[index] != -1;
}
public int getSortValue(int index){
return sortValues[index];
}
public byte[] getFieldTermKey() {
return fieldTermKeyBytes;
}
public ByteBuffer serialize() {
// flags, freq, norm, pos, numoff, off
int size = 1 + 4 + (hasNorm() ? 1 : 0) + (hasPositions() ? getPositions().length * 4 : 0) + (hasOffsets() ? getOffsets().length * 4 + 4 : 0);
// store the initial content flags in the inital byte
byte flags = 0;
if (hasNorm())
flags |= 1;
if (hasPositions())
flags |= 2;
if (hasOffsets())
flags |= 4;
int sortFieldSize = 0;
ByteBuffer flagBytes;
// add in more flag bytes if we have more than default 5 sortable fields
if(Scorer.numSort > initialSortSlots){
float diff = Scorer.numSort - initialSortSlots;
int numBytes = (int) Math.ceil(diff / 8);
size += numBytes;
flagBytes = ByteBuffer.allocate(1+numBytes);
} else {
flagBytes = ByteBuffer.allocate(1);
}
for(int i = 0; i < Scorer.numSort; i++){
if((i + 3) % 8 == 0){
flagBytes.put(flags);
flags = 0;
}
if(i < 5){
// use remaining 5 bits
if (hasSortField(i)) {
flags |= (1 << (i + 3));
sortFieldSize += 4;
}
} else {
// allocating new byte to support more sortable fields
int j = i + 3;
j %= 8;
if(hasSortField(i)){
flags |= (1 << j);
sortFieldSize += 4;
}
}
}
// insert last flag byte into buffer
flagBytes.put(flags);
// adding in each individual sortable fields bytes (4 bytes per field - integer)
size += sortFieldSize;
ByteBuffer r = ByteBuffer.allocate(size);
r.put(flagBytes.array());
r.put(SolbaseUtil.writeVInt(freq));
if (hasNorm())
r.put(getNorm());
if (hasPositions()) {
for (int i = 0; i < getPositions().length; i++) {
r.put(SolbaseUtil.writeVInt(getPositions()[i]));
}
}
if (hasOffsets()) {
r.put(SolbaseUtil.writeVInt(getOffsets().length));
for (int i = 0; i < getOffsets().length; i++) {
r.put(SolbaseUtil.writeVInt(getOffsets()[i]));
}
}
for(int i = 0; i < Scorer.numSort; i++){
if (hasSortField(i)){
byte[] sortValue = SolbaseUtil.writeVInt(getSortValue(i));
r.put(sortValue);
SolbaseUtil.mreadVInt(ByteBuffer.wrap(sortValue));
}
}
r.flip();
return r;
}
public Term getTerm(){
return this.term;
}
}