/** * Copyright 2009 T Jake Luciani * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package lucandra; import java.io.IOException; import java.util.Arrays; import java.util.List; import org.apache.cassandra.thrift.Column; import org.apache.cassandra.thrift.ColumnOrSuperColumn; import org.apache.log4j.Logger; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermPositions; public class LucandraTermDocs implements TermDocs, TermPositions { private IndexReader indexReader; private LucandraTermEnum termEnum; private List<ColumnOrSuperColumn> termDocs; private int docPosition; private int[] termPositionArray; private int termPosition; private int doc; private int freq; private static final Logger logger = Logger.getLogger(LucandraTermDocs.class); public LucandraTermDocs(IndexReader indexReader) { this.indexReader = indexReader; termEnum = new LucandraTermEnum(indexReader); } public void close() throws IOException { // TODO Auto-generated method stub } public int doc() { return doc; } private int getNextDoc() { if (docPosition < 0) docPosition = 0; int docid = indexReader.getDocumentNumber(termDocs.get(docPosition).getSuper_column().getName()); return docid; } public int freq() { return freq; } public int getNextFreq() { //Find the termFrequency List<Column> columns = termDocs.get(docPosition).getSuper_column().getColumns(); Column termFrequency = null; Column positionVector = null; for(Column c : columns){ if(Arrays.equals(CassandraUtils.termFrequencyKey.getBytes(), c.getName())){ termFrequency = c; } if(Arrays.equals(CassandraUtils.positionVectorKey.getBytes(), c.getName())){ positionVector = c; } } if(termFrequency == null){ throw new RuntimeException("termFrequency is missing from supercolumn"); } Integer freq = CassandraUtils.byteArrayToInt(termFrequency.getValue()); termPositionArray = positionVector == null ? null : CassandraUtils.byteArrayToIntArray(positionVector.getValue()); termPosition = 0; return freq; } public boolean next() throws IOException { if (termDocs == null) return false; if (docPosition == termDocs.size()) { return false; } else { doc = getNextDoc(); freq = getNextFreq(); docPosition++; return true; } } public int read(int[] docs, int[] freqs) throws IOException { int i = 0; for (; (termDocs != null && docPosition < termDocs.size() && i < docs.length); i++, docPosition++) { doc = getNextDoc(); freq = getNextFreq(); docs[i] = doc(); freqs[i] = freq(); } return i; } public void seek(Term term) throws IOException { // on a new term so check cached LucandraTermEnum tmp = indexReader.checkTermCache(term); if (tmp == null) { if (termEnum.skipTo(term)) { if (termEnum.term().compareTo(term) == 0) { termDocs = termEnum.getTermDocFreq(); } else { termDocs = null; } } } else { termEnum = tmp; if (termEnum.skipTo(term)) { if (termEnum.term().equals(term)) { termDocs = termEnum.getTermDocFreq(); } else { termDocs = null; } } else { termDocs = null; } } docPosition = -1; doc = -1; freq = -1; } public void seek(TermEnum termEnum) throws IOException { if (termEnum instanceof LucandraTermEnum) { this.termEnum = (LucandraTermEnum) termEnum; } else { this.termEnum = (LucandraTermEnum) indexReader.terms(termEnum.term()); } termDocs = this.termEnum.getTermDocFreq(); docPosition = -1; } public List<ColumnOrSuperColumn> filteredSeek(Term term, List<String> docNums){ termEnum.loadFilteredTerms(term, docNums); termDocs = termEnum.getTermDocFreq(); docPosition = -1; return termDocs; } //this should be used to find a already loaded doc public boolean skipTo(int target) throws IOException { do { if (!next()) return false; } while (target > doc()); return true; } public byte[] getPayload(byte[] data, int offset) throws IOException { return null; } public int getPayloadLength() { return 0; } public boolean isPayloadAvailable() { return false; } public int nextPosition() throws IOException { if(termPositionArray == null) return -1; int pos = termPositionArray[termPosition]; termPosition++; return pos; } }