/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.index;
import java.util.HashSet;
import java.util.Set;
import junit.framework.TestCase;
import junit.framework.TestFerrari;
import com.bizosys.hsearch.filter.FilterIds;
import com.bizosys.hsearch.filter.Storable;
public class TermListTest extends TestCase {
public static void main(String[] args) throws Exception {
TermListTest t = new TermListTest();
TestFerrari.testRandom(t);
}
public void testSingleTermDoc(String keyword, Byte termwt, Short docpos, Byte wt) throws Exception {
TermList tl = new TermList();
Character s = 'T';
byte termType = 12;
Term aTerm = new Term(keyword,s,termType, 123,docpos,wt);
tl.add(aTerm);
byte[] bytes = tl.toBytes();
TermList tl2 = new TermList();
byte[] hashcode = Storable.putInt(keyword.hashCode());
byte[] onlyTerms = FilterIds.isMatchingColBytes(bytes, hashcode);
tl2.loadTerms(onlyTerms);
assertEquals(1, tl2.totalTerms);
assertEquals(docpos.shortValue(), tl2.docPos[0]);
assertEquals(wt.byteValue(), tl2.termWeight[0]);
}
public void testSingleTermDocs(String keyword, Byte termType,
Byte w1, Short pos1, Byte w2, Short pos2) throws Exception {
TermList termList1 = new TermList();
Term aTerm = new Term(keyword,'T',termType, 123,pos1,w1);
termList1.add(aTerm);
Term bTerm = new Term(keyword,'T',termType, 123,pos2,w2);
termList1.add(bTerm);
byte[] hashcode = Storable.putInt(keyword.hashCode());
byte[] termList1B = termList1.toBytes();
byte[] onlyTerms = FilterIds.isMatchingColBytes(termList1B, hashcode);
TermList tl2 = new TermList();
tl2.loadTerms(onlyTerms);
assertEquals(2, tl2.totalTerms);
assertEquals(pos1.shortValue(), tl2.docPos[0]);
assertEquals(w1.byteValue(), tl2.termWeight[0]);
assertEquals(pos2.shortValue(), tl2.docPos[1]);
assertEquals(w2.byteValue(), tl2.termWeight[1]);
}
public void testSingleTermMergedDocs(String keyword,Byte tt1, Byte w1,
Short pos1, Byte w2, Short pos2, Byte dt1) throws Exception {
Character s = 'T';
if ( pos1 == pos2) pos2 = (short)( pos1 + 1);
Term aTerm = new Term(keyword,s,tt1,123,pos1,w1);
aTerm.setDocumentTypeCode(dt1);
Term bTerm = new Term(keyword,s,tt1,123,pos2,w2);
bTerm.setDocumentTypeCode(dt1);
TermList termList1 = new TermList();
termList1.add(aTerm);
byte[] termList1B = termList1.toBytes();
TermList termList2 = new TermList();
termList2.setExistingBytes(termList1B); //existing bytes
termList2.add(bTerm); //new term
byte[] termList2B = termList2.toBytes();
TermList res = new TermList();
byte[] hashcode = Storable.putInt(keyword.hashCode());
byte[] onlyTerms = FilterIds.isMatchingColBytes(termList2B, hashcode);
res.loadTerms(onlyTerms);
System.out.println(res.toString());
assertEquals(2, res.totalTerms);
assertEquals(pos2.shortValue(), res.docPos[0]);
assertEquals(w2.byteValue(), res.termWeight[0]);
assertEquals(dt1.byteValue(), res.docTypesCodes[0]);
assertEquals(pos1.shortValue(), res.docPos[1]);
assertEquals(w1.byteValue(), res.termWeight[1]);
assertEquals(dt1.byteValue(), res.docTypesCodes[1]);
}
public void testMultipleTermDoc(String keyword1,
String keyword2, Byte pos1, Byte w1, Byte w2) throws Exception {
TermList tl = new TermList();
Character s = 'T';
byte termType = 12;
Term aTerm = new Term(keyword1,s,termType, 123, pos1, w1);
tl.add(aTerm);
Term bTerm = new Term(keyword2,s,termType, 123, pos1, w2);
tl.add(bTerm);
byte[] bytes = tl.toBytes();
TermList tl2 = new TermList();
byte[] hashcode = Storable.putInt(keyword1.hashCode());
byte[] onlyTerms = FilterIds.isMatchingColBytes(bytes, hashcode);
tl2.loadTerms(onlyTerms);
assertEquals(1, tl2.totalTerms);
assertEquals(pos1.shortValue(), tl2.docPos[0]);
assertEquals(w1.byteValue(), tl2.termWeight[0]);
}
public void testMultipleTermDocs(String keyword1,
String keyword2, String keyword3) throws Exception {
TermList tl = new TermList();
Character s = 'T';
byte termType = 12;
Term aTerm = new Term(keyword1,s,termType, 123);
aTerm.setDocumentPosition((short)111);
aTerm.setTermWeight((byte)79);
tl.add(aTerm);
Term bTerm = new Term(keyword2,s,termType, 123);
bTerm.setDocumentPosition((short)111);
bTerm.setTermWeight((byte)79);
tl.add(bTerm);
Term cTerm = new Term(keyword3,s,termType, 123);
cTerm.setDocumentPosition((short)112);
cTerm.setTermWeight((byte)45);
tl.add(cTerm);
Term dTerm = new Term(keyword2,s,termType, 123);
dTerm.setDocumentPosition((short)112);
dTerm.setTermWeight((byte)23);
tl.add(dTerm);
byte[] bytes = tl.toBytes();
TermList tl2 = new TermList();
byte[] hashcode = Storable.putInt(keyword2.hashCode());
byte[] onlyTerms = FilterIds.isMatchingColBytes(bytes, hashcode);
tl2.loadTerms(onlyTerms);
System.out.println(tl2.toString());
assertEquals(2, tl2.totalTerms);
assertEquals(111, tl2.docPos[0]);
assertEquals(79, tl2.termWeight[0]);
assertEquals(112, tl2.docPos[1]);
assertEquals(23, tl2.termWeight[1]);
hashcode = Storable.putInt(keyword3.hashCode());
onlyTerms = FilterIds.isMatchingColBytes(bytes, hashcode);
tl2.loadTerms(onlyTerms);
System.out.println(tl2.toString());
assertEquals(1, tl2.totalTerms);
assertEquals(112, tl2.docPos[0]);
assertEquals(45, tl2.termWeight[0]);
}
public void testMultipleTermMergedDocs(
String keyword1, String keyword2, String keyword3,
Short pos1, Short pos2, Byte w1, Byte w2, Byte w3) throws Exception {
Character s = 'T';
byte termType = 12;
Term aTerm = new Term(keyword1,s,termType, 123, pos1, w1);
Term bTerm = new Term(keyword2,s,termType, 123, pos1, w2);
Term cTerm = new Term(keyword3,s,termType, 123, pos2, w3);
Term dTerm = new Term(keyword2,s,termType, 123, pos2, w3);
TermList tl = new TermList();
tl.add(aTerm);
tl.add(bTerm);
TermList t2 = new TermList();
t2.setExistingBytes(tl.toBytes());
tl.add(cTerm);
tl.add(dTerm);
byte[] bytes = tl.toBytes();
TermList tl2 = new TermList();
byte[] hashcode = Storable.putInt(keyword2.hashCode());
byte[] onlyTerms = FilterIds.isMatchingColBytes(bytes, hashcode);
tl2.loadTerms(onlyTerms);
assertEquals(2, tl2.totalTerms);
assertEquals(pos1.shortValue(), tl2.docPos[0]);
assertEquals(w2.byteValue(), tl2.termWeight[0]);
assertEquals(pos2.shortValue(), tl2.docPos[1]);
assertEquals(w3.byteValue(), tl2.termWeight[1]);
TermList tl3 = new TermList();
hashcode = Storable.putInt(keyword3.hashCode());
onlyTerms = FilterIds.isMatchingColBytes(bytes, hashcode);
tl3.loadTerms(onlyTerms);
assertEquals(1, tl3.totalTerms);
assertEquals(pos2.shortValue(), tl3.docPos[0]);
assertEquals(w3.byteValue(), tl3.termWeight[0]);
}
public void testUpdatesDocs(String keyword1, String keyword2,
String keyword3, String keyword4, Short pos1, Byte w1,
Short pos2, Byte w2, Byte w3) throws Exception {
Character s = 'T';
byte termType = 12;
Term aTerm = new Term(keyword1,s,termType, 123, pos1, w1);
Term bTerm = new Term(keyword2,s,termType, 123, pos2, w2);
Term cTerm = new Term(keyword3,s,termType, 123, pos2, w3);
Term dTerm = new Term(keyword4,s,termType, 123, pos2, w3);
TermList tl = new TermList();
tl.add(aTerm);
tl.add(bTerm);
byte[] origB = tl.toBytes();
TermList tx = new TermList();
tx.setExistingBytes(origB);
tx.add(cTerm);
tx.add(dTerm);
byte[] hashcode = Storable.putInt(keyword2.hashCode());
byte[] onlyTerms = FilterIds.isMatchingColBytes(tx.toBytes(), hashcode);
TermList ty = new TermList();
ty.loadTerms(onlyTerms);
assertEquals(0, ty.totalTerms);
hashcode = Storable.putInt(keyword1.hashCode());
onlyTerms = FilterIds.isMatchingColBytes(tx.toBytes(), hashcode);
TermList tz = new TermList();
tz.loadTerms(onlyTerms);
assertEquals(1, tz.totalTerms);
assertEquals(pos1.shortValue(), tz.docPos[0]);
assertEquals(w1.byteValue(), tz.termWeight[0]);
hashcode = Storable.putInt(keyword4.hashCode());
onlyTerms = FilterIds.isMatchingColBytes(tx.toBytes(), hashcode);
TermList tu = new TermList();
tu.loadTerms(onlyTerms);
assertEquals(1, tu.totalTerms);
assertEquals(pos2.shortValue(), tu.docPos[0]);
assertEquals(w3.byteValue(), tu.termWeight[0]);
}
public void testFilterByWord(
String keyword1, String keyword2, String keyword3,
Short pos1, Short pos2, Byte w1, Byte w2, Byte w3) throws Exception {
Character s = 'T';
byte termType = 12;
Term aTerm = new Term(keyword1,s,TermType.NONE_TYPECODE, 123, pos1, w1);
Term bTerm = new Term(keyword2,s,TermType.NONE_TYPECODE, 123, pos1, w2);
Term cTerm = new Term(keyword3,s,termType, 123, pos2, w3);
Term dTerm = new Term(keyword2,s,TermType.NONE_TYPECODE, 123, pos2, w3);
TermList tl = new TermList();
tl.add(aTerm);
tl.add(bTerm);
TermList t2 = new TermList();
t2.setExistingBytes(tl.toBytes());
tl.add(cTerm);
tl.add(dTerm);
byte[] bytes = tl.toBytes();
TermList tl2 = new TermList();
byte[] hashcode = Storable.putInt(keyword3.hashCode());
byte[] onlyTerms = FilterIds.isMatchingColBytes(bytes, hashcode);
Set<Integer> ignorePos = new HashSet<Integer>();
tl2.loadTerms(onlyTerms,ignorePos, DocumentType.NONE_TYPECODE,termType);
assertEquals(1, tl2.totalTerms);
assertEquals(pos2.shortValue(), tl2.docPos[0]);
assertEquals(w3.byteValue(), tl2.termWeight[0]);
}
}