package ivory.ffg.feature; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import junit.framework.JUnit4TestAdapter; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import ivory.core.data.document.IntDocVector; import ivory.core.data.document.LazyIntDocVector; import ivory.ffg.data.DocumentVector; import ivory.ffg.data.DocumentVectorUtility; import ivory.ffg.score.TfScoringFunction; public class FeatureTest { private static final String[] documentVectorClass = new String[] { "ivory.ffg.data.DocumentVectorHashedArray", "ivory.ffg.data.DocumentVectorMiniInvertedIndex", "ivory.ffg.data.DocumentVectorPForDeltaArray", "ivory.ffg.data.DocumentVectorVIntArray" }; private static final Map<Feature, Map<int[], Float>> features = Maps.newHashMap(); private static IntDocVector intDocVector; private static int[] document = null; private static final SortedMap<Integer, int[]> indexedDocument = new TreeMap<Integer, int[]>(); @BeforeClass public static void setUpFeatures() { Map<int[], Float> termFeatures = Maps.newHashMap(); termFeatures.put(new int[] {100}, 4.0f); termFeatures.put(new int[] {101}, 3.0f); termFeatures.put(new int[] {429}, 1.0f); features.put(new TermFeature(), termFeatures); Map<int[], Float> od1SdFeatures = Maps.newHashMap(); od1SdFeatures.put(new int[] {100, 101}, 1f); od1SdFeatures.put(new int[] {32, 100}, 1f); od1SdFeatures.put(new int[] {15, 380}, 0f); od1SdFeatures.put(new int[] {101, 100}, 2f); od1SdFeatures.put(new int[] {100, 4}, 1f); od1SdFeatures.put(new int[] {100, 4, 43}, 2f); features.put(new OrderedWindowSequentialDependenceFeature(1), od1SdFeatures); Map<int[], Float> od8SdFeatures = Maps.newHashMap(); od8SdFeatures.put(new int[] {100, 101}, 4f); od8SdFeatures.put(new int[] {32, 100}, 1f); od8SdFeatures.put(new int[] {15, 380}, 1f); od8SdFeatures.put(new int[] {101, 100}, 2f); od8SdFeatures.put(new int[] {100, 4}, 3f); od8SdFeatures.put(new int[] {100, 4, 43}, 4f); features.put(new OrderedWindowSequentialDependenceFeature(8), od8SdFeatures); Map<int[], Float> uw1SdFeatures = Maps.newHashMap(); uw1SdFeatures.put(new int[] {100, 101}, 3f); uw1SdFeatures.put(new int[] {32, 100}, 1f); uw1SdFeatures.put(new int[] {15, 380}, 0f); uw1SdFeatures.put(new int[] {101, 100}, 3f); uw1SdFeatures.put(new int[] {100, 4}, 0f); uw1SdFeatures.put(new int[] {100, 4, 43}, 0f); features.put(new UnorderedWindowSequentialDependenceFeature(1), uw1SdFeatures); Map<int[], Float> uw8SdFeatures = Maps.newHashMap(); uw8SdFeatures.put(new int[] {100, 101}, 6f); uw8SdFeatures.put(new int[] {32, 100}, 3f); uw8SdFeatures.put(new int[] {15, 380}, 1f); uw8SdFeatures.put(new int[] {101, 100}, 6f); uw8SdFeatures.put(new int[] {100, 4}, 4f); uw8SdFeatures.put(new int[] {100, 4, 43}, 5f); features.put(new UnorderedWindowSequentialDependenceFeature(8), uw8SdFeatures); } @BeforeClass public static void setUpIntDocVector() throws Exception { document = new int[] { 100, 73500, 429, 101, 100, 32, 48, 100, 101, 100, 7300, 4, 11, 43, 101, 15, 1, 12, 380, 400 }; Map<Integer, List<Integer>> map = Maps.newHashMap(); for(int i = 0; i < document.length; i++) { if(!map.containsKey(document[i])) { List<Integer> list = Lists.newArrayList(); map.put(document[i], list); } map.get(document[i]).add(i + 1); } for(int key: map.keySet()) { int[] positions = new int[map.get(key).size()]; int i = 0; for(int pos: map.get(key)) { positions[i++] = pos; } indexedDocument.put(key, positions); } intDocVector = new LazyIntDocVector(indexedDocument); ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(byteOut); intDocVector.write(dataOut); dataOut.close(); ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray()); DataInputStream dataIn = new DataInputStream(byteIn); intDocVector = new LazyIntDocVector(); intDocVector.readFields(dataIn); } @Test public void testFeaturesSlidingWindow() throws Exception { for(String dvclass: documentVectorClass) { DocumentVector dv = DocumentVectorUtility.newInstance(dvclass, intDocVector); try { int[] transformedDoc = dv.decompressDocument(); for(Feature f: features.keySet()) { f.initialize(new TfScoringFunction()); for(int[] query: features.get(f).keySet()) { int[] transformedTerms = dv.transformTerms(query); float fValue = f. computeScoreWithSlidingWindow(transformedDoc, query, transformedTerms, null); assertEquals(features.get(f).get(query), fValue, 1e-10); } } } catch(UnsupportedOperationException e) { continue; } } } @Test public void testFeaturesWithMiniIndexing() throws Exception { for(String dvclass: documentVectorClass) { DocumentVector dv = DocumentVectorUtility.newInstance(dvclass, intDocVector); try { for(Feature f: features.keySet()) { f.initialize(new TfScoringFunction()); for(int[] query: features.get(f).keySet()) { int[][] positions = dv.decompressPositions(query); float fValue = f. computeScoreWithMiniIndexes(positions, query, dv.getDocumentLength(), null); assertEquals(features.get(f).get(query), fValue, 1e-10); } } } catch(UnsupportedOperationException e) { continue; } } } public static junit.framework.Test suite() { return new JUnit4TestAdapter(FeatureTest.class); } }