/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.index;
import java.util.List;
import junit.framework.TestCase;
import junit.framework.TestFerrari;
public class InvertedIndexTest extends TestCase {
public static void main(String[] args) throws Exception {
InvertedIndexTest t = new InvertedIndexTest();
TestFerrari.testAll(t);
}
public void testSingleTermAtEnd() {
TermList tl = new TermList();
Term t1 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,12);
t1.setDocumentPosition((short)1001);
t1.setDocumentTypeCode((byte) 44);
t1.setTermWeight((byte)91);
Term t2 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,22);
t2.setDocumentTypeCode((byte) 45);
t2.setTermWeight((byte)92);
t2.setDocumentPosition((short)1002);
tl.add(t1);
tl.add(t2);
byte[] deletedB = InvertedIndex.delete(tl.toBytes(), (short) 1002);
assertNotNull(deletedB);
List<InvertedIndex> iiL = InvertedIndex.read(deletedB);
assertNotNull(iiL);
assertEquals(1, iiL.size());
assertEquals("abinash".hashCode(), iiL.get(0).hash);
assertEquals(1, iiL.get(0).docPos.length );
assertEquals(1, iiL.get(0).dtc.length );
assertEquals(1, iiL.get(0).ttc.length );
assertEquals(1, iiL.get(0).tw.length );
assertEquals((short)1001, iiL.get(0).docPos[0]);
assertEquals((byte)44, iiL.get(0).dtc[0]);
assertEquals((byte)1, iiL.get(0).ttc[0]);
assertEquals((byte)91, iiL.get(0).tw[0]);
}
public void testMultiTermAtEnd() {
TermList tl = new TermList();
Term t1 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,12);
t1.setDocumentPosition((short)1001);
t1.setDocumentTypeCode((byte) 44);
t1.setTermWeight((byte)91);
Term t2 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,22);
t2.setDocumentTypeCode((byte) 45);
t2.setTermWeight((byte)92);
t2.setDocumentPosition((short)1002);
Term t3 = new Term("avinash",Term.TERMLOC_BODY,(byte)1,22);
t3.setDocumentTypeCode((byte) 45);
t3.setTermWeight((byte)92);
t3.setDocumentPosition((short)1002);
tl.add(t1);
tl.add(t2);
tl.add(t3);
byte[] deletedB = InvertedIndex.delete(tl.toBytes(), (short) 1002);
assertNotNull(deletedB);
List<InvertedIndex> iiL = InvertedIndex.read(deletedB);
assertNotNull(iiL);
assertEquals(1, iiL.size());
assertEquals("abinash".hashCode(), iiL.get(0).hash);
assertEquals(1, iiL.get(0).docPos.length );
assertEquals(1, iiL.get(0).dtc.length );
assertEquals(1, iiL.get(0).ttc.length );
assertEquals(1, iiL.get(0).tw.length );
assertEquals((short)1001, iiL.get(0).docPos[0]);
assertEquals((byte)44, iiL.get(0).dtc[0]);
assertEquals((byte)1, iiL.get(0).ttc[0]);
assertEquals((byte)91, iiL.get(0).tw[0]);
}
public void testSingleTermAtBeginning() {
TermList tl = new TermList();
Term t1 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,12);
t1.setDocumentPosition((short)1001);
t1.setDocumentTypeCode((byte) 44);
t1.setTermWeight((byte)91);
Term t2 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,22);
t2.setDocumentTypeCode((byte) 45);
t2.setTermWeight((byte)92);
t2.setDocumentPosition((short)1002);
Term t3 = new Term("avinash",Term.TERMLOC_BODY,(byte)1,22);
t3.setDocumentTypeCode((byte) 45);
t3.setTermWeight((byte)92);
t3.setDocumentPosition((short)1002);
tl.add(t1);
tl.add(t2);
tl.add(t3);
byte[] deletedB = InvertedIndex.delete(tl.toBytes(), (short) 1001);
assertNotNull(deletedB);
List<InvertedIndex> iiL = InvertedIndex.read(deletedB);
assertNotNull(iiL);
assertEquals(2, iiL.size());
assertEquals("abinash".hashCode(), iiL.get(0).hash);
assertEquals(1, iiL.get(0).docPos.length );
assertEquals(1, iiL.get(0).dtc.length );
assertEquals(1, iiL.get(0).ttc.length );
assertEquals(1, iiL.get(0).tw.length );
assertEquals((short)1002, iiL.get(0).docPos[0]);
assertEquals((byte)45, iiL.get(0).dtc[0]);
assertEquals((byte)1, iiL.get(0).ttc[0]);
assertEquals((byte)92, iiL.get(0).tw[0]);
assertEquals("avinash".hashCode(), iiL.get(1).hash);
assertEquals((short)1002, iiL.get(1).docPos[0]);
assertEquals((byte)45, iiL.get(1).dtc[0]);
assertEquals((byte)1, iiL.get(1).ttc[0]);
assertEquals((byte)92, iiL.get(1).tw[0]);
}
public void testSingleTermAtMiddle() {
TermList tl = new TermList();
Term t1 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,12);
t1.setDocumentPosition((short)1001);
t1.setDocumentTypeCode((byte) 44);
t1.setTermWeight((byte)91);
Term t2 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,22);
t2.setDocumentTypeCode((byte) 45);
t2.setTermWeight((byte)92);
t2.setDocumentPosition((short)1002);
Term t3 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,22);
t3.setDocumentTypeCode((byte) 46);
t3.setTermWeight((byte)93);
t3.setDocumentPosition((short)1003);
tl.add(t1);
tl.add(t2);
tl.add(t3);
byte[] deletedB = InvertedIndex.delete(tl.toBytes(), (short) 1002);
assertNotNull(deletedB);
List<InvertedIndex> iiL = InvertedIndex.read(deletedB);
assertEquals(1, iiL.size());
assertEquals((short)1001, iiL.get(0).docPos[0]);
assertEquals((short)1003, iiL.get(0).docPos[1]);
assertEquals((byte)44, iiL.get(0).dtc[0]);
assertEquals((byte)46, iiL.get(0).dtc[1]);
}
public void testMultipleTermAtMiddle() {
TermList tl = new TermList();
Term t1 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,12);
t1.setDocumentPosition((short)1001);
t1.setDocumentTypeCode((byte) 44);
t1.setTermWeight((byte)91);
Term t2 = new Term("avinash",Term.TERMLOC_BODY,(byte)1,22);
t2.setDocumentTypeCode((byte) 45);
t2.setTermWeight((byte)92);
t2.setDocumentPosition((short)1001);
Term t3 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,22);
t3.setDocumentTypeCode((byte) 46);
t3.setTermWeight((byte)93);
t3.setDocumentPosition((short)1002);
Term t4 = new Term("alinash",Term.TERMLOC_BODY,(byte)1,22);
t4.setDocumentTypeCode((byte) 46);
t4.setTermWeight((byte)93);
t4.setDocumentPosition((short)1003);
Term t5 = new Term("akinash",Term.TERMLOC_BODY,(byte)1,22);
t5.setDocumentTypeCode((byte) 46);
t5.setTermWeight((byte)93);
t5.setDocumentPosition((short)1004);
tl.add(t1);
tl.add(t2);
tl.add(t3);
tl.add(t4);
tl.add(t5);
byte[] deletedB = InvertedIndex.delete(tl.toBytes(), (short) 1002);
assertNotNull(deletedB);
List<InvertedIndex> iiL = InvertedIndex.read(deletedB);
assertEquals(4, iiL.size());
assertEquals("akinash".hashCode(), iiL.get(3).hash);
assertEquals((short)1004, iiL.get(3).docPos[0]);
assertEquals("alinash".hashCode(), iiL.get(0).hash);
assertEquals((short)1003, iiL.get(0).docPos[0]);
assertEquals("abinash".hashCode(), iiL.get(1).hash);
assertEquals((short)1001, iiL.get(1).docPos[0]);
assertEquals("avinash".hashCode(), iiL.get(2).hash);
assertEquals((short)1001, iiL.get(1).docPos[0]);
}
public void testTypeFiltering() {
TermList tl = new TermList();
Term t1 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,12);
t1.setDocumentPosition((short)1001);
t1.setDocumentTypeCode((byte) 44);
t1.setTermWeight((byte)91);
t1.setTermTypeCode((byte)12);
Term t2 = new Term("avinash",Term.TERMLOC_BODY,(byte)1,22);
t2.setDocumentTypeCode((byte) 45);
t2.setTermWeight((byte)92);
t2.setDocumentPosition((short)1001);
Term t3 = new Term("abinash",Term.TERMLOC_BODY,(byte)1,22);
t3.setDocumentTypeCode((byte) 46);
t3.setTermWeight((byte)93);
t3.setDocumentPosition((short)1002);
Term t4 = new Term("alinash",Term.TERMLOC_BODY,(byte)1,22);
t4.setDocumentTypeCode((byte) 46);
t4.setTermWeight((byte)93);
t4.setDocumentPosition((short)1003);
Term t5 = new Term("akinash",Term.TERMLOC_BODY,(byte)1,22);
t5.setDocumentTypeCode((byte) 46);
t5.setTermWeight((byte)93);
t5.setDocumentPosition((short)1004);
tl.add(t1);
tl.add(t2);
tl.add(t3);
tl.add(t4);
tl.add(t5);
byte[] deletedB = InvertedIndex.delete(tl.toBytes(), (short) 1002);
assertNotNull(deletedB);
List<InvertedIndex> iiL = InvertedIndex.read(deletedB);
assertEquals(4, iiL.size());
assertEquals("akinash".hashCode(), iiL.get(3).hash);
assertEquals((short)1004, iiL.get(3).docPos[0]);
assertEquals("alinash".hashCode(), iiL.get(0).hash);
assertEquals((short)1003, iiL.get(0).docPos[0]);
assertEquals("abinash".hashCode(), iiL.get(1).hash);
assertEquals((short)1001, iiL.get(1).docPos[0]);
assertEquals("avinash".hashCode(), iiL.get(2).hash);
assertEquals((short)1001, iiL.get(1).docPos[0]);
}
}