package lucandra;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.cassandra.thrift.Cassandra;
import org.apache.cassandra.thrift.Column;
import org.apache.cassandra.thrift.ColumnOrSuperColumn;
import org.apache.cassandra.thrift.ColumnPath;
import org.apache.cassandra.thrift.ConsistencyLevel;
import org.apache.cassandra.thrift.InvalidRequestException;
import org.apache.cassandra.thrift.NotFoundException;
import org.apache.cassandra.thrift.TimedOutException;
import org.apache.cassandra.thrift.UnavailableException;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.thrift.TException;
public class TermFreqVector implements org.apache.lucene.index.TermFreqVector, org.apache.lucene.index.TermPositionVector {
private String field;
private String docId;
private String[] terms;
private int[] freqVec;
private int[][] termPositions;
private TermVectorOffsetInfo[][] termOffsets;
public TermFreqVector(String indexName, String field, String docId, Cassandra.Iface client) {
this.field = field;
this.docId = docId;
String key = indexName + CassandraUtils.delimeter + docId;
// Get all terms
ColumnOrSuperColumn column;
try {
column = client.get(CassandraUtils.keySpace, CassandraUtils.hashKey(key), CassandraUtils.metaColumnPath, ConsistencyLevel.ONE);
List<String> allTermList = (List<String>) CassandraUtils.fromBytes(column.column.value);
List<String> keys = new ArrayList<String>();
for (String termStr : allTermList) {
Term t = CassandraUtils.parseTerm(termStr);
// skip the ones not of this field
if (!t.field().equals(field))
continue;
// add to multiget params
keys.add(CassandraUtils.hashKey(
indexName+CassandraUtils.delimeter+termStr
));
}
//Fetch all term vectors in this field
Map<String, ColumnOrSuperColumn> allTermInfo = client.multiget(CassandraUtils.keySpace, keys, new ColumnPath(CassandraUtils.termVecColumnFamily).setSuper_column(docId.getBytes()), ConsistencyLevel.ONE);
terms = new String[allTermInfo.size()];
freqVec = new int[allTermInfo.size()];
termPositions = new int[allTermInfo.size()][];
termOffsets = new TermVectorOffsetInfo[allTermInfo.size()][];
int i = 0;
for(Map.Entry<String, ColumnOrSuperColumn> e : allTermInfo.entrySet()){
String termStr = e.getKey().substring(e.getKey().indexOf(CassandraUtils.delimeter) + CassandraUtils.delimeter.length());
Term t = CassandraUtils.parseTerm(termStr);
terms[i] = t.text();
//Find the offsets and positions
Column positionVector = null;
Column offsetVector = null;
List<Column> columns = e.getValue().getSuper_column().getColumns();
for(Column c : columns){
if(Arrays.equals(c.getName(), CassandraUtils.positionVectorKey.getBytes()))
positionVector = c;
if(Arrays.equals(c.getName(), CassandraUtils.offsetVectorKey.getBytes()))
offsetVector = c;
}
termPositions[i] = positionVector == null ? new int[]{} : CassandraUtils.byteArrayToIntArray(positionVector.value);
freqVec[i] = termPositions[i].length;
if(offsetVector == null){
termOffsets[i] = TermVectorOffsetInfo.EMPTY_OFFSET_INFO;
}else{
int[] offsets = CassandraUtils.byteArrayToIntArray(offsetVector.getValue());
termOffsets[i] = new TermVectorOffsetInfo[freqVec[i]];
for(int j=0,k=0; j<offsets.length; j+=2,k++){
termOffsets[i][k] = new TermVectorOffsetInfo(offsets[j] , offsets[j+1]);
}
}
i++;
}
} catch (InvalidRequestException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (NotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnavailableException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TimedOutException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public String getField() {
return field;
}
public int[] getTermFrequencies() {
return freqVec;
}
public String[] getTerms() {
return terms;
}
public int indexOf(String term) {
return Arrays.binarySearch(terms, term);
}
public int[] indexesOf(String[] terms, int start, int len) {
int[] res = new int[terms.length];
for(int i=0; i<terms.length; i++){
res[i] = indexOf(terms[i]);
}
return res;
}
public int size() {
return terms.length;
}
public TermVectorOffsetInfo[] getOffsets(int index) {
return termOffsets[index];
}
public int[] getTermPositions(int index) {
return termPositions[index];
}
}