/**
* Copyright 2009 T Jake Luciani
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package lucandra;
import java.util.List;
import junit.framework.TestCase;
import org.apache.cassandra.thrift.Cassandra;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.thrift.ConsistencyLevel;
import org.apache.cassandra.thrift.KeySlice;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.util.Version;
public class LucandraTests extends TestCase {
private static final String indexName = String.valueOf(System.nanoTime());
private static final Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
private static final String text = "this is an example value foobar foobar";
private static final String highlightedText = "this is an example value <B>foobar</B> <B>foobar</B>";
private static Cassandra.Iface client;
static {
try {
client = CassandraUtils.createConnection();
} catch (Exception e) {
e.printStackTrace();
fail(e.getLocalizedMessage());
}
}
private static final IndexWriter indexWriter = new IndexWriter(indexName, client);
public void testWriter() throws Exception {
Document doc1 = new Document();
Field f = new Field("key", text, Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS);
doc1.add(f);
indexWriter.addDocument(doc1, analyzer);
Document doc2 = new Document();
Field f2 = new Field("key", "this is another example", Field.Store.YES, Field.Index.ANALYZED);
doc2.add(f2);
indexWriter.addDocument(doc2, analyzer);
String start = CassandraUtils.hashKey(indexName + CassandraUtils.delimeter + "key" + CassandraUtils.delimeter);
String finish = "";
ColumnParent columnParent = new ColumnParent(CassandraUtils.termVecColumnFamily);
SlicePredicate slicePredicate = new SlicePredicate();
// Get all columns
SliceRange sliceRange = new SliceRange(new byte[] {}, new byte[] {}, true, Integer.MAX_VALUE);
slicePredicate.setSlice_range(sliceRange);
List<KeySlice> columns = client.get_range_slice(CassandraUtils.keySpace, columnParent, slicePredicate, start, finish, 5000, ConsistencyLevel.ONE);
int matchingColumns = 0;
for(KeySlice ks : columns){
String termStr = ks.getKey().substring(ks.getKey().indexOf(CassandraUtils.delimeter) + CassandraUtils.delimeter.length());
Term term = CassandraUtils.parseTerm(termStr);
if(term.field().equals("key") && ks.getKey().equals(CassandraUtils.hashKey(indexName+CassandraUtils.delimeter+term.field()+CassandraUtils.delimeter+term.text())))
matchingColumns++;
}
// Index 10 documents to test order
for (int i = 300; i >= 200; i--) {
Document doc = new Document();
doc.add(new Field("key", "sort this", Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("date", "test" + i, Field.Store.YES, Field.Index.NOT_ANALYZED));
indexWriter.addDocument(doc, analyzer);
}
// Unicode doc
Document d3 = new Document();
d3.add(new Field("key", new String("\u5639\u563b"), Field.Store.YES, Field.Index.ANALYZED));
d3.add(new Field("key", new String("samefield"), Field.Store.YES, Field.Index.ANALYZED));
d3.add(new Field("url", "http://www.google.com", Field.Store.YES, Field.Index.NOT_ANALYZED));
indexWriter.addDocument(d3, analyzer);
//
assertEquals(5, matchingColumns);
assertEquals(104, indexWriter.docCount());
}
public void testUnicode() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
Query q = qp.parse("+key:\u5639\u563b");
TopDocs docs = searcher.search(q, 10);
assertEquals(1, docs.totalHits);
Document doc = searcher.doc(docs.scoreDocs[0].doc);
assertNotNull(doc.getField("key"));
}
public void testMultiValuedFields() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
Query q = qp.parse("+key:samefield");
TopDocs docs = searcher.search(q, 10);
assertEquals(1, docs.totalHits);
Document doc = searcher.doc(docs.scoreDocs[0].doc);
Field[] fields = doc.getFields("key");
String[] tests = new String[]{"\u5639\u563b","samefield"};
assertEquals(2,fields.length);
for(int i=0; i<fields.length; i++){
assertEquals(tests[i],fields[i].stringValue());
}
}
public void testKeywordField() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
TermQuery tq = new TermQuery(new Term("url", "http://www.google.com"));
TopDocs topDocs = searcher.search(tq, 10);
assertEquals(topDocs.totalHits,1);
}
public void testDelete() throws Exception {
indexWriter.deleteDocuments(new Term("key", new String("\u5639\u563b")));
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
Query q = qp.parse("+key:\u5639\u563b");
TopDocs docs = searcher.search(q, 10);
assertEquals(0, docs.totalHits);
}
public void testSearch() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
Query q = qp.parse("+key:another");
TopDocs docs = searcher.search(q, 10);
assertEquals(1, docs.totalHits);
Document doc = searcher.doc(docs.scoreDocs[0].doc);
assertNotNull(doc.getField("key"));
}
public void testScore() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
Query q = qp.parse("+key:example");
TopDocs docs = searcher.search(q, 10);
assertEquals(2, docs.totalHits);
Document doc = searcher.doc(docs.scoreDocs[0].doc);
String fld = doc.getField("key").stringValue();
// Highest scoring doc should be the one with higher boost
assertEquals(fld, "this is another example");
}
public void testMissingQuery() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
// check something that doesn't exist
Query q = qp.parse("+key:bogus");
TopDocs docs = searcher.search(q, 10);
assertEquals(0, docs.totalHits);
}
public void testWildcardQuery() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
// check wildcard
Query q = qp.parse("+key:anoth*");
TopDocs docs = searcher.search(q, 10);
assertEquals(1, docs.totalHits);
Document d = indexReader.document(1);
String val = d.get("key");
assertTrue(val.equals("this is another example"));
// check wildcard
q = qp.parse("+date:test*");
docs = searcher.search(q, 10);
assertEquals(101, docs.totalHits);
}
public void testSortQuery() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
// check sort
Sort sort = new Sort(new SortField("date", SortField.STRING));
Query q = qp.parse("+key:sort");
TopDocs docs = searcher.search(q, null, 10, sort);
for (int i = 0; i < 10; i++) {
Document d = indexReader.document(docs.scoreDocs[i].doc);
String dval = d.get("date");
assertEquals("test" + (i + 200), dval);
}
}
public void testRangeQuery() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
// check range queries
Query q = qp.parse("+key:[apple TO zoo]");
TopDocs docs = searcher.search(q, 10);
assertEquals(103, docs.totalHits);
}
public void testExactQuery() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
// check exact
Query q = qp.parse("+key:\"foobar foobar\"");
TopDocs docs = searcher.search(q, 10);
assertEquals(1, docs.totalHits);
q = qp.parse("+key:\"not in index\"");
docs = searcher.search(q, 10);
assertEquals(0, docs.totalHits);
q = qp.parse("+key:\"is an\"");
docs = searcher.search(q, 10);
assertEquals(1, docs.totalHits);
}
public void testSimpleAnalyzerWriteRead() throws Exception {
Document doc = new Document();
Field f = new Field("title", text, Field.Store.YES, Field.Index.ANALYZED);
doc.add(f);
indexWriter.addDocument(doc, analyzer);
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "title", analyzer);
Query q = qp.parse("foobar");
TopDocs docs = searcher.search(q, 10);
assertEquals(1, docs.totalHits);
q = qp.parse("\"not in index\"");
docs = searcher.search(q, 10);
assertEquals(0, docs.totalHits);
indexReader.reopen();
q = qp.parse("\"foobar foobar\"");
docs = searcher.search(q, 10);
assertEquals(0, docs.totalHits);
}
public void testHighlight() throws Exception {
// This tests the TermPositionVector classes
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
// check exact
Query q = qp.parse("+key:\"foobar foobar\"");
TopDocs docs = searcher.search(q, 10);
assertEquals(1, docs.totalHits);
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
QueryScorer scorer = new QueryScorer(q, "key", text);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
TokenStream tvStream = TokenSources.getTokenStream(indexReader, docs.scoreDocs[0].doc, "key");
String rv = highlighter.getBestFragment(tvStream, text);
assertNotNull(rv);
assertEquals(rv, highlightedText);
}
public void testLucandraFilter() throws Exception {
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
try {
for (int i = 0; i < 10; i++) {
Document doc1 = new Document();
doc1.add(new Field("aKey", "aKey"+i, Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
doc1.add(new Field("category", "category1", Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
indexWriter.addDocument(doc1, analyzer);
}
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "aKey", analyzer);
Query q = qp.parse("aKey1");
LucandraFilter filter = new LucandraFilter();
filter.addTerm(new Term("category", "category1"));
TopDocs docs = searcher.search(q,filter, 10);
assertEquals(1, docs.totalHits);
indexReader.reopen();
q = qp.parse("aKey1 OR aKey2");
docs = searcher.search(q,filter, 10);
assertEquals(2, docs.totalHits);
indexReader.reopen();
q = qp.parse("[aKey0 TO aKey5]");
docs = searcher.search(q,filter, 10);
assertEquals(6, docs.totalHits);
indexReader.reopen();
filter = new LucandraFilter();
filter.addTerm(new Term("category", "category0"));
docs = searcher.search(q,filter, 10);
assertEquals(0, docs.totalHits);
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
}
}
public void testLucandraTermDocs() throws Exception {
IndexWriter indexWriter = new IndexWriter(indexName, client);
int docSize = 100;
for (int i = 0; i < docSize; i++) {
Document doc1 = new Document();
Field f1 = new Field("UUID", "UUID" + i,
Field.Store.NO,
Field.Index.NOT_ANALYZED_NO_NORMS);
Field f2 = new Field("parent", "parenta",
Field.Store.NO,
Field.Index.NOT_ANALYZED_NO_NORMS);
Field f3 = new Field("nodeType", "item",
Field.Store.NO,
Field.Index.NOT_ANALYZED_NO_NORMS);
doc1.add(f1);
doc1.add(f2);
doc1.add(f3);
indexWriter.addDocument(doc1, analyzer);
}
TermQuery tq = new TermQuery(new Term("parent", "parenta"));
TermQuery tq1 = new TermQuery(new Term("nodeType", "item"));
BooleanQuery query = new BooleanQuery();
query.add(tq, BooleanClause.Occur.MUST);
query.add(tq1, BooleanClause.Occur.MUST);
IndexReader indexReader = new IndexReader(indexName, client);
IndexSearcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(query, 1000);
assertEquals(topDocs.totalHits, docSize);
}
}