package ivory.ffg.data;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.util.List;
import java.util.Set;
import junit.framework.JUnit4TestAdapter;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import ivory.core.data.index.TermPositions;
public class CompressedPositionalPostingsTest {
private static final int[] smallDataset = new int[]{
10, 34, 36, 87, 436, 439, 783, 5643
};
private static final List<TermPositions> smallTermPositions =
Lists.newArrayList();
private static final int[] largeDataset = new int[200];
private static final List<TermPositions> largeTermPositions =
Lists.newArrayList();
@BeforeClass public static void setUpSmallDataset() {
Set<Integer> positions = Sets.newHashSet();
for(int d: smallDataset) {
int[] pos = new int[(int) (Math.random() * 3) + 1];
for(int i = 0; i < pos.length; i++) {
pos[i] = (int) (Math.random() * smallDataset.length * 100);
if(positions.contains(pos[i])) {
i--;
continue;
}
if(i > 0) {
if(pos[i] < pos[i - 1]) {
i--;
continue;
}
}
positions.add(pos[i]);
}
smallTermPositions.add(new TermPositions(pos, (short) pos.length));
}
}
@BeforeClass public static void setUpLargeDataset() {
for(int i = 0; i < largeDataset.length; i++) {
largeDataset[i] = i * 3 + 1;
}
Set<Integer> positions = Sets.newHashSet();
for(int d: largeDataset) {
int[] pos = new int[(int) (Math.random() * 3) + 1];
for(int i = 0; i < pos.length; i++) {
pos[i] = (int) (Math.random() * largeDataset.length * 100);
if(positions.contains(pos[i])) {
i--;
continue;
}
if(i > 0) {
if(pos[i] < pos[i - 1]) {
i--;
continue;
}
}
positions.add(pos[i]);
}
largeTermPositions.add(new TermPositions(pos, (short) pos.length));
}
}
@Test public void testSmallDataset() throws Exception {
CompressedPositionalPostings postings =
CompressedPositionalPostings.newInstance(smallDataset, smallTermPositions);
for(int i = 0; i < smallDataset.length; i++) {
int[] pos = postings.decompressPositions(i);
assertEquals(pos.length, smallTermPositions.get(i).getPositions().length);
for(int j = 0; j < pos.length; j++) {
assertEquals(pos[j], smallTermPositions.get(i).getPositions()[j]);
}
}
}
@Test public void testLargeDataset() throws Exception {
CompressedPositionalPostings postings =
CompressedPositionalPostings.newInstance(largeDataset, largeTermPositions);
for(int i = 0; i < largeDataset.length; i++) {
int[] pos = postings.decompressPositions(i);
assertEquals(pos.length, largeTermPositions.get(i).getPositions().length);
for(int j = 0; j < pos.length; j++) {
assertEquals(pos[j], largeTermPositions.get(i).getPositions()[j]);
}
}
}
@Test public void testIO() throws Exception {
CompressedPositionalPostings postings =
CompressedPositionalPostings.newInstance(largeDataset, largeTermPositions);
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
DataOutputStream dataOut = new DataOutputStream(byteOut);
postings.write(dataOut);
dataOut.close();
ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray());
DataInputStream dataIn = new DataInputStream(byteIn);
CompressedPositionalPostings postingsCopy =
CompressedPositionalPostings.readInstance(dataIn);
assertEquals(postings, postingsCopy);
}
public static junit.framework.Test suite() {
return new JUnit4TestAdapter(CompressedPositionalPostingsTest.class);
}
}