package ivory.ffg.data; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import junit.framework.JUnit4TestAdapter; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import ivory.core.data.document.IntDocVector; import ivory.core.data.document.LazyIntDocVector; public class DocumentVectorTest { private static final String[] documentVectorClass = new String[] { "ivory.ffg.data.DocumentVectorHashedArray", "ivory.ffg.data.DocumentVectorMiniInvertedIndex", "ivory.ffg.data.DocumentVectorPForDeltaArray", "ivory.ffg.data.DocumentVectorVIntArray" }; private static IntDocVector intDocVector; private static final int[] document = new int[500]; private static final SortedMap<Integer, int[]> indexedDocument = new TreeMap<Integer, int[]>(); private static int[] terms; @BeforeClass public static void setUp() throws Exception { Map<Integer, List<Integer>> map = Maps.newHashMap(); for(int i = 0; i < document.length; i++) { document[i] = (int) (Math.random() * 70000) + 1; if(!map.containsKey(document[i])) { List<Integer> list = Lists.newArrayList(); map.put(document[i], list); } map.get(document[i]).add(i + 1); } for(int key: map.keySet()) { int[] positions = new int[map.get(key).size()]; int i = 0; for(int pos: map.get(key)) { positions[i++] = pos; } indexedDocument.put(key, positions); } terms = new int[map.size()]; int i = 0; for(int key: map.keySet()) { terms[i++] = key; } intDocVector = new LazyIntDocVector(indexedDocument); ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(byteOut); intDocVector.write(dataOut); dataOut.close(); ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray()); DataInputStream dataIn = new DataInputStream(byteIn); intDocVector = new LazyIntDocVector(); intDocVector.readFields(dataIn); } @Test public void testDecompressDocuments() throws Exception { for(String dvclass: documentVectorClass) { DocumentVector dv = DocumentVectorUtility.newInstance(dvclass, intDocVector); assertEquals(document.length, dv.getDocumentLength()); try { int[] transformedDoc = dv.decompressDocument(); int[] transformedTerms = dv.transformTerms(terms); int[][] positions = DocumentVectorUtility. getPositions(transformedDoc, transformedTerms); for(int i = 0; i < positions.length; i++) { assertEquals(indexedDocument.get(terms[i]).length, positions[i].length); for(int j = 0; j < positions[i].length; j++) { assertEquals(terms[i], document[positions[i][j] - 1]); } } } catch(UnsupportedOperationException e) { continue; } } } @Test public void testDecompressPositions() throws Exception { for(String dvclass: documentVectorClass) { DocumentVector dv = DocumentVectorUtility.newInstance(dvclass, intDocVector); try { int[][] positions = dv.decompressPositions(terms); if(dvclass.contains("mini")) { for(int i = 0; i < positions.length; i++) { for(int j = 0; j < positions[i].length; j++) { System.out.print(positions[i][j] + " "); } System.out.println(); } } for(int i = 0; i < positions.length; i++) { assertEquals(indexedDocument.get(terms[i]).length, positions[i].length); for(int j = 0; j < positions[i].length; j++) { assertEquals(terms[i], document[positions[i][j] - 1]); } } } catch(UnsupportedOperationException e) { continue; } } } @Test public void testIO() throws Exception { for(String dvclass: documentVectorClass) { DocumentVector dv = DocumentVectorUtility.newInstance(dvclass, intDocVector); ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(byteOut); dv.write(dataOut); dataOut.close(); ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray()); DataInputStream dataIn = new DataInputStream(byteIn); DocumentVector dvCopy = DocumentVectorUtility.readInstance(dvclass, dataIn); assertEquals(dv, dvCopy); } } public static junit.framework.Test suite() { return new JUnit4TestAdapter(DocumentVectorTest.class); } }