package ivory.ffg.data;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import com.google.common.base.Preconditions;
import org.apache.hadoop.io.WritableUtils;
/**
* Document vector representation: Flat array compressed using
* Variable-Length Integers.
*
* @author Nima Asadi
*/
public class DocumentVectorVIntArray implements DocumentVector {
private byte[] document; // Compressed document vector
private int documentLength; // Length of the original document vector
@Override public void write(DataOutput output) throws IOException {
Preconditions.checkNotNull(output);
output.writeInt(documentLength);
output.writeInt(document.length);
for(int i = 0; i < document.length; i++) {
output.writeByte(document[i]);
}
}
@Override public void readFields(DataInput input) throws IOException {
Preconditions.checkNotNull(input);
documentLength = input.readInt();
document = new byte[input.readInt()];
for(int i = 0; i < document.length; i++) {
document[i] = input.readByte();
}
}
/**
* Reads and returns an instance of this class from input
*
* @param input DataInput
* @return A compressed document vector
*/
public static DocumentVectorVIntArray readInstance(DataInput input) throws IOException {
Preconditions.checkNotNull(input);
DocumentVectorVIntArray document = new DocumentVectorVIntArray();
document.readFields(input);
return document;
}
private DocumentVectorVIntArray() {
}
@Override public int getDocumentLength() {
return documentLength;
}
@Override public int[] decompressDocument() throws IOException {
int[] decomp = new int[documentLength];
ByteArrayInputStream byteStream = new ByteArrayInputStream(document);
DataInputStream dataStream = new DataInputStream(byteStream);
for(int i = 0; i < decomp.length; i++) {
decomp[i] = WritableUtils.readVInt(dataStream);
}
dataStream.close();
return decomp;
}
@Override public int[][] decompressPositions(int[] terms) throws IOException {
Preconditions.checkNotNull(terms);
return DocumentVectorUtility.getPositions(decompressDocument(), terms);
}
@Override public int[] transformTerms(int[] terms) {
return terms;
}
/**
* Constructs a document vector and compresses it using Variable-Length Integer coding.
*
* @param data Flat array representation of a document
* @return A document vector.
*/
public static DocumentVectorVIntArray newInstance(int[] data) throws IOException {
Preconditions.checkNotNull(data);
ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
DataOutputStream dataStream = new DataOutputStream(byteStream);
for(int i = 0; i < data.length; i++) {
WritableUtils.writeVInt(dataStream, data[i]);
}
dataStream.close();
byte[] document = byteStream.toByteArray();
DocumentVectorVIntArray instance = new DocumentVectorVIntArray();
instance.document = document;
instance.documentLength = data.length;
return instance;
}
@Override public boolean equals(Object o) {
Preconditions.checkNotNull(o);
Preconditions.checkArgument(o instanceof DocumentVectorVIntArray);
DocumentVectorVIntArray other = (DocumentVectorVIntArray) o;
if(this.documentLength != other.documentLength ||
this.document.length != other.document.length) {
return false;
}
for(int i = 0; i < document.length; i++) {
if(this.document[i] != other.document[i]) {
return false;
}
}
return true;
}
}