/*
* ClueWeb Tools: Hadoop tools for manipulating ClueWeb collections
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.clueweb.data;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.WritableUtils;
public class VByteDocVector implements DocVector {
private int[] termids;
public VByteDocVector() {}
public int[] getTermIds() {
return termids;
}
public int getLength() {
return termids.length;
}
public static void fromBytesWritable(BytesWritable bytes, VByteDocVector doc) {
try {
ByteArrayInputStream bytesIn = new ByteArrayInputStream(bytes.getBytes());
DataInputStream data = new DataInputStream(bytesIn);
int length = WritableUtils.readVInt(data);
doc.termids = new int[length];
for (int i = 0; i < length; i++) {
doc.termids[i] = WritableUtils.readVInt(data);
}
} catch (IOException e) {
doc.termids = new int[0];
}
}
public static void toBytesWritable(BytesWritable bytes, int[] termids, int length) {
try {
if (termids == null) {
termids = new int[] {};
length = 0;
}
ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
DataOutputStream dataOut = new DataOutputStream(bytesOut);
WritableUtils.writeVInt(dataOut, length);
for (int i = 0; i < length; i++) {
WritableUtils.writeVInt(dataOut, termids[i]);
}
byte[] raw = bytesOut.toByteArray();
bytes.set(raw, 0, raw.length);
} catch (IOException e) {
bytes.set(new byte[] {}, 0, 0);
}
}
}