/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.imhotep.local; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import com.indeed.flamdex.api.FlamdexReader; import com.indeed.flamdex.query.Query; import com.indeed.flamdex.query.Term; import com.indeed.flamdex.reader.MockFlamdexReader; import com.indeed.imhotep.BucketStats; import com.indeed.imhotep.GroupMultiRemapRule; import com.indeed.imhotep.GroupRemapRule; import com.indeed.imhotep.ImhotepMemoryPool; import com.indeed.imhotep.MemoryReservationContext; import com.indeed.imhotep.QueryRemapRule; import com.indeed.imhotep.RegroupCondition; import com.indeed.imhotep.api.ImhotepOutOfMemoryException; import com.indeed.flamdex.MakeAFlamdex; import com.indeed.imhotep.group.ImhotepChooser; import org.junit.Assert; import org.junit.Test; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import static junit.framework.Assert.assertEquals; import static junit.framework.Assert.fail; import static org.junit.Assert.*; /** * @author jsgroth */ public class TestImhotepLocalSession { @Test public void testPushPopGetDepth() throws ImhotepOutOfMemoryException { // This test doesn't really specifically need the 2d test setup, // but that setup is good enoguh for this too. FlamdexReader r = new2DMetricRegroupTestReader(); ImhotepLocalSession session = new ImhotepLocalSession(r); assertEquals(0, session.getNumStats()); int numStats = session.pushStat("if1"); assertEquals(1, numStats); assertEquals(1, session.getNumStats()); numStats = session.pushStat("if2"); assertEquals(2, numStats); assertEquals(2, session.getNumStats()); numStats = session.pushStat("if3"); assertEquals(3, numStats); assertEquals(3, session.getNumStats()); numStats = session.popStat(); // should pop "if3" assertEquals(2, numStats); assertEquals(2, session.getNumStats()); numStats = session.pushStat("*"); // should reduce to if1 * if2 assertEquals(1, numStats); assertEquals(1, session.getNumStats()); numStats = session.pushStats(Arrays.asList("if1", "if2", "if3")); assertEquals(4, numStats); assertEquals(4, session.getNumStats()); } @Test public void testMoreConditionsThanTargetGroups() throws ImhotepOutOfMemoryException { final MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.<String> asList(), Arrays.<String> asList(), 16); r.addIntTerm("if1", 1, 1, 3, 5, 7, 9, 11, 13, 15); // 0th bit r.addIntTerm("if1", 2, 2, 3, 6, 7, 10, 11, 14, 15); // 1st bit r.addIntTerm("if1", 4, 4, 5, 6, 7, 12, 13, 14, 15); // 2nd bit r.addIntTerm("if1", 8, 8, 9, 10, 11, 12, 13, 14, 15); // 2nd bit // 0000, 0001, 0010, 0011, 0100, 0101, 0110, 0111, 1000, 1001, ... ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 0, new int[] { 1, 1, 1, 1 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 1, null, false), new RegroupCondition( "if1", true, 2, null, false), new RegroupCondition( "if1", true, 4, null, false), new RegroupCondition( "if1", true, 8, null, false), }) }); int[] arr = new int[16]; session.exportDocIdToGroupId(arr); System.out.println(Arrays.toString(arr)); assertArrayEquals(new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, arr); } @Test public void testResetThenRegroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Collections.<String> emptyList(), Arrays.asList("if1"), 10); r.addIntTerm("if1", 0, 1, 3, 5, 7, 9); r.addIntTerm("if1", 5, 0, 2, 4, 6, 8); ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupRemapRule[] { new GroupRemapRule( 1, new RegroupCondition("if1", true, 0, "", false), 0, 1) }); session.resetGroups(); session.pushStat("count()"); int numGroups = session.regroup(new GroupRemapRule[] { new GroupRemapRule( 99999, new RegroupCondition( "if1", true, 1, "", false), 1, 1) }); assertEquals(1, numGroups); } @Test public void testMetricRegroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = newMetricRegroupTestReader(); ImhotepLocalSession session = new ImhotepLocalSession(r); session.pushStat("if1"); int numGroups = session.metricRegroup(0, 0, 20, 5); assertEquals(7, numGroups); // 4 buckets, 2 gutters, group 0 int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); assertEquals(Arrays.asList(2, 3, 2, 4, 2, 4, 2, 3, 2, 6), Ints.asList(docIdToGroup)); } @Test public void testMetricRegroup2() throws ImhotepOutOfMemoryException { MockFlamdexReader r = newMetricRegroupTestReader(); ImhotepLocalSession session = new ImhotepLocalSession(r); assertEquals(2, session.regroup(new GroupRemapRule[] { new GroupRemapRule( 1, new RegroupCondition( "sf1", false, 0, "☃", false), 1, 0) })); session.pushStat("if1"); int numGroups = session.metricRegroup(0, 9, 17, 4); assertEquals(5, numGroups); // 2 buckets, 2 gutters, group 0 int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); assertEquals(Arrays.asList(3, 1, 3, 0, 0, 2, 3, 1, 0, 4), Ints.asList(docIdToGroup)); } private static MockFlamdexReader newMetricRegroupTestReader() { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.asList("sf1"), Arrays.asList("if1"), 10); r.addIntTerm("if1", 5, Arrays.asList(0, 2, 4, 6, 8)); r.addIntTerm("if1", 10, Arrays.asList(1, 7)); r.addIntTerm("if1", 15, Arrays.asList(3, 5)); r.addIntTerm("if1", 20, Arrays.asList(9)); r.addStringTerm("sf1", "☃", Arrays.asList(3, 4, 8)); return r; } @Test public void test2DMetricRegroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new2DMetricRegroupTestReader(); ImhotepLocalSession session = new ImhotepLocalSession(r); session.pushStat("if1"); session.pushStat("if2"); session.metricRegroup2D(0, 1, 8, 3, 1, 4, 12, 2); int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); assertEquals(Arrays.asList(1, 2, 7, 7, 13, 13, 18, 19, 25, 25), Ints.asList(docIdToGroup)); session.pushStat("if3"); long[] if3 = Arrays.copyOf(session.getGroupStats(2), 31); long[] expected = { 0, 1, 4, 0, 0, 0, // 1-5 0, 25, 0, 0, 0, // 6-10 0, 0, 61, 0, 0, // 11-15 0, 0, 49, 64, 0, // 16-20 0, 0, 0, 0, 181, // 21-25 0, 0, 0, 0, 0, // 26-30 }; assertEquals(Longs.asList(expected), Longs.asList(if3)); BucketStats bs = new BucketStats(if3, 5, 6); assertEquals(25, bs.get(0, 0)); assertEquals(1, bs.getXYUnderflow()); assertEquals(4, bs.getYUnderflow(0)); assertEquals(61, bs.get(1, 1)); assertEquals(49, bs.get(1, 2)); assertEquals(64, bs.get(2, 2)); assertEquals(181, bs.getXOverflow(3)); for (int y = 0; y < 4; ++y) { assertEquals(0, bs.getXUnderflow(y)); } for (int x = 0; x < 3; ++x) { assertEquals(0, bs.getYOverflow(x)); } assertEquals(0, bs.getXYOverflow()); assertEquals(0, bs.getXUnderflowYOverflow()); assertEquals(0, bs.getXOverflowYUnderflow()); assertEquals(0, bs.getYUnderflow(1)); assertEquals(0, bs.getYUnderflow(2)); assertEquals(0, bs.get(1, 0)); assertEquals(0, bs.get(2, 0)); assertEquals(0, bs.get(0, 1)); assertEquals(0, bs.get(0, 2)); assertEquals(0, bs.get(0, 3)); assertEquals(0, bs.get(2, 1)); assertEquals(0, bs.get(2, 3)); assertEquals(0, bs.get(1, 3)); for (int y = 0; y < 3; ++y) { assertEquals(0, bs.getXOverflow(y)); } } @Test public void test2DMetricRegroup2() throws ImhotepOutOfMemoryException { FlamdexReader r = new2DMetricRegroupTestReader(); ImhotepLocalSession session = new ImhotepLocalSession(r); session.pushStat("if1"); session.pushStat("if2"); session.metricRegroup2D(0, 4, 8, 2, 1, 2, 4, 2); int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); assertEquals(Ints.asList(5, 5, 9, 9, 10, 10, 11, 11, 12, 12), Ints.asList(docIdToGroup)); session.pushStat("if3"); long[] if3 = Arrays.copyOf(session.getGroupStats(2), 13); long[] expected = { 0, 0, 0, 0, 0, // 1-4 5, 0, 0, 0, // 5-8 25, 61, 113, 181, // 9-12 }; assertEquals(Longs.asList(expected), Longs.asList(if3)); BucketStats bs = new BucketStats(if3, 4, 3); assertEquals(5, bs.getXUnderflow(0)); assertEquals(25, bs.getXUnderflowYOverflow()); assertEquals(61, bs.getYOverflow(0)); assertEquals(113, bs.getYOverflow(1)); assertEquals(181, bs.getXYOverflow()); assertEquals(0, bs.getXYUnderflow()); assertEquals(0, bs.getYUnderflow(0)); assertEquals(0, bs.getYUnderflow(1)); assertEquals(0, bs.getXOverflowYUnderflow()); assertEquals(0, bs.get(0, 0)); assertEquals(0, bs.get(1, 0)); } @Test public void test2DMetricRegroup3() throws ImhotepOutOfMemoryException { FlamdexReader r = new2DMetricRegroupTestReader(); ImhotepLocalSession session = new ImhotepLocalSession(r); session.pushStat("if1"); session.pushStat("if2"); session.regroup(new QueryRemapRule(1, Query.newTermQuery(new Term("sf1", false, 0, "☃")), 1, 0)); session.metricRegroup2D(0, 0, 1, 90000, 1, 8, 10, 2); int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); assertEquals(Ints.asList(2, 0, 3, 3, 0, 0, 0, 0, 9, 9), Ints.asList(docIdToGroup)); session.pushStat("if3"); long[] if3 = Arrays.copyOf(session.getGroupStats(2), 10); long[] expected = { 0, 0, 1, 25, 0, 0, 0, 0, 0, 181, }; assertEquals(Longs.asList(expected).subList(1, expected.length), Longs.asList(if3) .subList(1, if3.length)); BucketStats bs = new BucketStats(if3, 3, 3); assertEquals(0, bs.getXYUnderflow()); assertEquals(1, bs.getYUnderflow(0)); assertEquals(25, bs.getXOverflowYUnderflow()); assertEquals(0, bs.getXUnderflow(0)); assertEquals(0, bs.get(0, 0)); assertEquals(0, bs.getXOverflow(0)); assertEquals(0, bs.getXUnderflowYOverflow()); assertEquals(0, bs.getYOverflow(0)); assertEquals(181, bs.getXYOverflow()); } private static MockFlamdexReader new2DMetricRegroupTestReader() { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1", "if2", "if3"), Arrays.asList("sf1"), Arrays.asList("if1", "if2", "if3"), 10); r.addIntTerm("if1", 0, 0); r.addIntTerm("if1", 1, 1); r.addIntTerm("if1", 2, 2); r.addIntTerm("if1", 3, 3); r.addIntTerm("if1", 4, 4); r.addIntTerm("if1", 5, 5); r.addIntTerm("if1", 6, 6); r.addIntTerm("if1", 7, 7); r.addIntTerm("if1", 8, 8); r.addIntTerm("if1", 9, 9); r.addIntTerm("if2", 2, 0, 1); r.addIntTerm("if2", 4, 2, 3); r.addIntTerm("if2", 6, 4, 5); r.addIntTerm("if2", 8, 6, 7); r.addIntTerm("if2", 10, 8, 9); r.addIntTerm("if3", 1, 0); r.addIntTerm("if3", 4, 1); r.addIntTerm("if3", 9, 2); r.addIntTerm("if3", 16, 3); r.addIntTerm("if3", 25, 4); r.addIntTerm("if3", 36, 5); r.addIntTerm("if3", 49, 6); r.addIntTerm("if3", 64, 7); r.addIntTerm("if3", 81, 8); r.addIntTerm("if3", 100, 9); r.addStringTerm("sf1", "☃", Arrays.asList(1, 4, 5, 6, 7)); return r; } @Test public void testOrRegroup() throws ImhotepOutOfMemoryException { final FlamdexReader r = MakeAFlamdex.make(); final ImhotepLocalSession session = new ImhotepLocalSession(r); session.stringOrRegroup("sf4", new String[] { "asdf", "cdef" }, (char) 1, (char) 0, (char) 1); session.pushStat("count()"); long[] stats = session.getGroupStats(0); assertEquals(6, stats[1]); session.close(); } @Test public void testStuff() throws ImhotepOutOfMemoryException { final FlamdexReader r = MakeAFlamdex.make(); final ImhotepLocalSession session = new ImhotepLocalSession(r); session.pushStat("count()"); session.regroup(new GroupRemapRule[] { new GroupRemapRule(1, new RegroupCondition("if3", true, 9999, null, false), 1, 2) }); session.regroup(new GroupRemapRule[] { new GroupRemapRule(1, new RegroupCondition("if3", true, 19, null, false), 1, 2), new GroupRemapRule( 2, new RegroupCondition("sf2", false, 0, "b", false), 3, 4) }); long[] stats = session.getGroupStats(0); assertEquals(10, stats[1]); assertEquals(5, stats[2]); assertEquals(4, stats[3]); assertEquals(1, stats[4]); session.close(); } @Test public void testDynamicMetric() throws ImhotepOutOfMemoryException { final FlamdexReader r = MakeAFlamdex.make(); final ImhotepLocalSession session = new ImhotepLocalSession(r); session.createDynamicMetric("foo"); session.pushStat("dynamic foo"); assertEquals(Longs.asList(0, 0), Longs.asList(session.getGroupStats(0))); session.updateDynamicMetric("foo", new int[] { 0, 1 }); assertEquals(Longs.asList(0, 20), Longs.asList(session.getGroupStats(0))); session.regroup(new GroupRemapRule[] { new GroupRemapRule(1, new RegroupCondition("if2", true, 0, null, false), 1, 2) }); assertEquals(Longs.asList(0, 15, 5), Longs.asList(session.getGroupStats(0))); session.updateDynamicMetric("foo", new int[] { 0, 0, -2 }); assertEquals(Longs.asList(0, 15, -5), Longs.asList(session.getGroupStats(0))); // reset all to group 1 session.regroup(new GroupRemapRule[] { new GroupRemapRule(1, new RegroupCondition("if2", true, 0, null, false), 1, 1), new GroupRemapRule(2, new RegroupCondition("if2", true, 0, null, false), 1, 1) }); assertEquals(Longs.asList(0, 10), Longs.asList(session.getGroupStats(0)).subList(0, 2)); } @Test public void testRandomMultiRegroup_firstIndexLessThan() throws ImhotepOutOfMemoryException { final FlamdexReader r = MakeAFlamdex.make(); final ImhotepLocalSession session = new ImhotepLocalSession(r); // normal case -- 0.5 falls in the [0.4, 0.7) bucket, which is the // fourth (index == 3) in the list of: // [0.0, 0.1) // [0.1, 0.3) // [0.3, 0.4) // [0.4, 0.7) // [0.7, 0.9) // [0.9, 1.0] assertEquals(3, session.indexOfFirstLessThan(0.5, new double[] { 0.1, 0.3, 0.4, 0.7, 0.9 })); // less than all assertEquals(0, session.indexOfFirstLessThan(0.0, new double[] { 0.1, 0.2, 0.3 })); // empty array assertEquals(0, session.indexOfFirstLessThan(0.5, new double[] {})); // less than last element assertEquals(3, session.indexOfFirstLessThan(0.8, new double[] { 0.1, 0.4, 0.5, 0.9 })); // greater than all assertEquals(4, session.indexOfFirstLessThan(0.95, new double[] { 0.1, 0.4, 0.5, 0.9 })); // equal to last (at index 4) assertEquals(4, session.indexOfFirstLessThan(0.9, new double[] { 0.1, 0.4, 0.5, 0.9 })); } @Test public void testRandomMultiRegroup_ensureValidMultiRegroupArrays() throws ImhotepOutOfMemoryException { final FlamdexReader r = MakeAFlamdex.make(); final ImhotepLocalSession session = new ImhotepLocalSession(r); // ******************************** Stuff that's OK: // normal case session.ensureValidMultiRegroupArrays(new double[] { 0.1, 0.5, 0.7, 0.9 }, new int[] { 1, 2, 3, 4, 5 }); // ******************************** Stuff that's not OK: // Bad lengths try { session.ensureValidMultiRegroupArrays(new double[] { 0.1, 0.2, 0.3 }, new int[] { 1, 2, 3 }); fail("ensureValidMultiRegroupArrays didn't throw IllegalArgumentException"); } catch (IllegalArgumentException e) { } // expected try { session.ensureValidMultiRegroupArrays(new double[] { 0.1, 0.5, 0.7 }, new int[] { 1, 2, 3, 4, 5, 6 }); fail("ensureValidMultiRegroupArrays didn't throw IllegalArgumentException"); } catch (IllegalArgumentException e) { } // expected try { session.ensureValidMultiRegroupArrays(new double[] { 0.1, 0.3, 0.5, 0.7, 0.9 }, new int[] { 1, 2, 3 }); fail("ensureValidMultiRegroupArrays didn't throw IllegalArgumentException"); } catch (IllegalArgumentException e) { } // expected // Percentages not in order try { session.ensureValidMultiRegroupArrays(new double[] { 0.1, 0.5, 0.3 }, new int[] { 1, 2, 3, 4 }); fail("ensureValidMultiRegroupArrays didn't throw IllegalArgumentException"); } catch (IllegalArgumentException e) { } // expected // Percentages out of bounds try { session.ensureValidMultiRegroupArrays(new double[] { -0.001, 0.1, 0.5, 0.7 }, new int[] { 1, 2, 3, 4, 5 }); fail("ensureValidMultiRegroupArrays didn't throw IllegalArgumentException"); } catch (IllegalArgumentException e) { } // expected try { session.ensureValidMultiRegroupArrays(new double[] { 0.1, 0.5, 0.7, 1.00001 }, new int[] { 1, 2, 3, 4, 5 }); fail("ensureValidMultiRegroupArrays didn't throw IllegalArgumentException"); } catch (IllegalArgumentException e) { } // expected } @Test public void testRandomMultiRegroup() throws ImhotepOutOfMemoryException { final FlamdexReader r = MakeAFlamdex.make(); final ImhotepLocalSession session = new ImhotepLocalSession(r); // Expected // ( @see MakeAFlamdex.make() ) final String regroupField = "sf1"; final ImhotepChooser chooser = new ImhotepChooser("salt", -1.0); final HashSet<Integer> noTerm = new HashSet<Integer>(Arrays.asList(2, 4, 7, 10, 11, 12, 13, 14, 15, 17, 18)); final double[] percentages = new double[] { 0.10, 0.50 }; final int[] resultGroups = new int[] { 5, 6, 7 }; String[] docIdToTerm = new String[] { "", // 0 "a", // 1 null, // 2 "hello world", // 3 null, // 4 "", // 5 "a", // 6 null, // 7 "a", // 8 "hello world", // 9 null, // 10 null, // 11 null, // 12 null, // 13 null, // 14 null, // 15 "hello world", // 16 null, // 17 null, // 18 "a" // 19 }; final Map<String, Integer> termToGroup = Maps.newHashMap(); for (String term : Arrays.asList("", "a", "hello world")) { double hashValue = chooser.getValue(term); if (hashValue < 0.10) { termToGroup.put(term, 5); } else if (hashValue < 0.50) { termToGroup.put(term, 6); } else { termToGroup.put(term, 7); } } termToGroup.put(null, 1); // Actual session.randomMultiRegroup(regroupField, false, "salt", 1, percentages, resultGroups); // Make sure they're in the correct groups int[] docIdToGroup = new int[20]; session.exportDocIdToGroupId(docIdToGroup); for (int docId = 0; docId < 20; docId++) { final int actualGroup = docIdToGroup[docId]; final int expectedGroup = termToGroup.get(docIdToTerm[docId]); assertEquals("doc id #" + docId + " was misgrouped", expectedGroup, actualGroup); } } @Test public void testSingleMultisplitIntRegroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.<String> asList(), Arrays.<String> asList(), 11); for (int i = 1; i <= 10; i++) { List<Integer> l = Lists.newArrayList(); for (int j = 1; j <= i; j++) { l.add(j - 1); } r.addIntTerm("if1", i, l); } ImhotepLocalSession session = new ImhotepLocalSession(r); final RegroupCondition[] conditions = new RegroupCondition[10]; final int[] positiveGroups = new int[10]; for (int i = 1; i <= 10; i++) { conditions[i - 1] = new RegroupCondition("if1", true, i, null, false); positiveGroups[i - 1] = i; } session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule(1, 0, positiveGroups, conditions) }); int[] docIdToGroup = new int[11]; session.exportDocIdToGroupId(docIdToGroup); for (int docId = 0; docId < 10; docId++) { final int actualGroup = docIdToGroup[docId]; final int expectedGroup = docId + 1; assertEquals("doc id #" + docId + " was misgrouped;", expectedGroup, actualGroup); } assertEquals("doc id #10 should be in no group", 0, docIdToGroup[10]); } @Test public void testSingleMultisplitStringRegroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.<String> asList(), Arrays.asList("sf1"), Arrays.<String> asList(), 11); for (int i = 1; i <= 10; i++) { List<Integer> l = Lists.newArrayList(); for (int j = 1; j <= i; j++) { l.add(j - 1); } r.addStringTerm("sf1", "" + i, l); } ImhotepLocalSession session = new ImhotepLocalSession(r); final RegroupCondition[] conditions = new RegroupCondition[10]; final int[] positiveGroups = new int[10]; for (int i = 1; i <= 10; i++) { conditions[i - 1] = new RegroupCondition("sf1", false, 0, "" + i, false); positiveGroups[i - 1] = i; } session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule(1, 0, positiveGroups, conditions) }); int[] docIdToGroup = new int[11]; session.exportDocIdToGroupId(docIdToGroup); for (int docId = 0; docId < 10; docId++) { final int actualGroup = docIdToGroup[docId]; final int expectedGroup = docId + 1; assertEquals("doc id #" + docId + " was misgrouped;", expectedGroup, actualGroup); } assertEquals("doc id #10 should be in no group", 0, docIdToGroup[10]); } @Test public void testParallelMultisplitIntRegroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1", "if2"), Arrays.<String> asList(), Arrays.<String> asList(), 22); for (int i = 1; i <= 10; i++) { List<Integer> l = Lists.newArrayList(); for (int j = 1; j <= i; j++) { l.add(j - 1); l.add(10 + (j - 1)); } r.addIntTerm("if1", i, l); } // Add 0-9 to if2 so we can split it out List<Integer> l = Lists.newArrayList(); for (int i = 0; i < 11; i++) { l.add(10 + i); } r.addIntTerm("if2", 0, l); ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new QueryRemapRule(1, Query.newTermQuery(new Term("if2", true, 0, null)), 1, 2)); final int[] positiveGroups = new int[10]; final RegroupCondition[] conditions = new RegroupCondition[10]; for (int i = 1; i <= 10; i++) { positiveGroups[i - 1] = i; conditions[i - 1] = new RegroupCondition("if1", true, i, null, false); } session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule(1, 0, positiveGroups, conditions), new GroupMultiRemapRule(2, 0, positiveGroups, conditions) }); int[] docIdToGroup = new int[22]; session.exportDocIdToGroupId(docIdToGroup); for (int docId = 0; docId < 10; docId++) { final int actualGroup = docIdToGroup[docId]; final int expectedGroup = docId + 1; assertEquals("doc id #" + docId + " was misgrouped;", expectedGroup, actualGroup); assertEquals("doc id #" + (10 + docId) + " was misgrouped;", expectedGroup, actualGroup); } assertEquals("doc id #20 should be in no group", 0, docIdToGroup[20]); assertEquals("doc id #21 should be in no group", 0, docIdToGroup[21]); } @Test public void testParallelMultisplitStringRegroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.<String> asList(), Arrays.asList("sf1", "sf2"), Arrays.<String> asList(), 22); for (int i = 1; i <= 10; i++) { List<Integer> l = Lists.newArrayList(); for (int j = 1; j <= i; j++) { l.add(j - 1); l.add(10 + (j - 1)); } r.addStringTerm("sf1", "" + i, l); } // Add 0-9 to if2 so we can split it out List<Integer> l = Lists.newArrayList(); for (int i = 0; i < 11; i++) { l.add(10 + i); } r.addStringTerm("sf2", "0", l); ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new QueryRemapRule(1, Query.newTermQuery(new Term("sf2", false, 0, "0")), 1, 2)); final int[] positiveGroups = new int[10]; final RegroupCondition[] conditions = new RegroupCondition[10]; for (int i = 1; i <= 10; i++) { positiveGroups[i - 1] = i; conditions[i - 1] = new RegroupCondition("sf1", false, 0, "" + i, false); } session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule(1, 0, positiveGroups, conditions), new GroupMultiRemapRule(2, 0, positiveGroups, conditions) }); int[] docIdToGroup = new int[22]; session.exportDocIdToGroupId(docIdToGroup); for (int docId = 0; docId < 10; docId++) { final int actualGroup = docIdToGroup[docId]; final int expectedGroup = docId + 1; assertEquals("doc id #" + docId + " was misgrouped;", expectedGroup, actualGroup); assertEquals("doc id #" + (10 + docId) + " was misgrouped;", expectedGroup, actualGroup); } assertEquals("doc id #20 should be in no group", 0, docIdToGroup[20]); assertEquals("doc id #21 should be in no group", 0, docIdToGroup[21]); } @Test public void testMultisplitTargetingNonexistentGroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.<String> asList(), Arrays.<String> asList(), 11); ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1000, 1234, new int[] { 1 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 1, null, false) }) }); } @Test public void testIntMultiInequalitySplit() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1", "if2"), Arrays.<String> asList(), Arrays.<String> asList(), 10); for (int i = 0; i < 10; i++) { r.addIntTerm("if1", i, i); r.addIntTerm("if2", i, i); } ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2, 3 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 5, null, true), new RegroupCondition( "if2", true, 7, null, true), new RegroupCondition( "if1", true, 9, null, true), // new // RegroupCondition("if2",true,4,null,true), }) }); final int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); for (int i = 0; i < 10; i++) { if (i <= 5) { assertEquals(1, docIdToGroup[i]); } else if (i <= 7) { assertEquals(2, docIdToGroup[i]); } else if (i <= 9) { assertEquals(3, docIdToGroup[i]); } else { assertEquals(5, docIdToGroup[i]); } } } @Test public void testMultisplitGeneralInputValidation() throws ImhotepOutOfMemoryException { // count mismatch #1 { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.<String> asList(), Arrays.<String> asList(), 10); ImhotepLocalSession session = new ImhotepLocalSession(r); try { session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 0, new int[] { 1 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 1, null, true), new RegroupCondition( "if1", true, 1, null, true), }) }); fail("Improperly handles having more conditions than positive groups"); } catch (IllegalArgumentException e) { } } // count mismatch #2 { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.<String> asList(), Arrays.<String> asList(), 10); ImhotepLocalSession session = new ImhotepLocalSession(r); try { session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 0, new int[] { 1, 2 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 1, null, true), }) }); fail("Improperly handles having fewer conditions than positive groups"); } catch (IllegalArgumentException e) { } } } @Test public void testIntMultisplitInequalityInputValidation() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.<String> asList(), Arrays.<String> asList(), 10); ImhotepLocalSession session = new ImhotepLocalSession(r); try { session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 7, null, true), new RegroupCondition( "if1", true, 4, null, true), }) }); fail("Improperly handles unreachable inequality splits"); } catch (IllegalArgumentException e) { } } @Test public void testStringMultisplitInequalityInputValidation() throws ImhotepOutOfMemoryException { final List<String> fields = Arrays.asList("sf1"); final List<String> emptyList = Arrays.<String> asList(); MockFlamdexReader r = new MockFlamdexReader(emptyList, fields, emptyList, 10); ImhotepLocalSession session = new ImhotepLocalSession(r); try { session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2 }, new RegroupCondition[] { new RegroupCondition( "sf1", false, 0, "7", true), new RegroupCondition( "sf1", false, 0, "4", true), }) }); fail("Improperly handles unreachable inequality splits"); } catch (IllegalArgumentException e) { } } @Test public void testStringMultisplitEqualityInputValidation() throws ImhotepOutOfMemoryException { final List<String> fields = Arrays.asList("sf1"); final List<String> emptyList = Arrays.<String> asList(); MockFlamdexReader r = new MockFlamdexReader(emptyList, fields, emptyList, 10); ImhotepLocalSession session = new ImhotepLocalSession(r); // verify doesn't fail session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2 }, new RegroupCondition[] { new RegroupCondition( "sf1", false, 0, "a", false), new RegroupCondition( "sf1", false, 0, "a", true) }) }); try { session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2 }, new RegroupCondition[] { new RegroupCondition( "sf1", false, 0, "a", false), new RegroupCondition( "sf1", false, 0, "a", false) }) }); fail("Improperly handles unreachable equality splits"); } catch (IllegalArgumentException e) { } } @Test public void testIntMultisplitEqualityInputValidation() throws ImhotepOutOfMemoryException { final List<String> fields = Arrays.asList("if1"); final List<String> emptyList = Arrays.<String> asList(); MockFlamdexReader r = new MockFlamdexReader(fields, emptyList, emptyList, 10); ImhotepLocalSession session = new ImhotepLocalSession(r); // verify doesn't fail session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 1, null, false), new RegroupCondition( "if1", true, 1, null, true) }) }); try { session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 1, null, false), new RegroupCondition( "if1", true, 1, null, false) }) }); fail("Improperly handles unreachable equality splits"); } catch (IllegalArgumentException e) { } } @Test public void testIntMultiParallelInequalitySplit() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1", "if2"), Arrays.<String> asList(), Arrays.<String> asList(), 20); for (int i = 0; i < 10; i++) { r.addIntTerm("if1", i, i, i + 10); } for (int i = 0; i < 10; i++) { r.addIntTerm("if2", 1, Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); } ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 1, new int[] { 2 }, new RegroupCondition[] { new RegroupCondition( "if2", true, 1, null, false) }) }); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2, 3 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 5, null, true), new RegroupCondition( "if1", true, 7, null, true), new RegroupCondition( "if1", true, 9, null, true), }), new GroupMultiRemapRule( 2, 10, new int[] { 6, 7, 8 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 5, null, true), new RegroupCondition( "if1", true, 7, null, true), new RegroupCondition( "if1", true, 9, null, true), }) }); final int[] docIdToGroup = new int[20]; session.exportDocIdToGroupId(docIdToGroup); for (int i = 0; i < 10; i++) { if (i <= 5) { assertEquals(1, docIdToGroup[i]); assertEquals(6, docIdToGroup[i + 10]); } else if (i <= 7) { assertEquals(2, docIdToGroup[i]); assertEquals(7, docIdToGroup[i + 10]); } else if (i <= 9) { assertEquals(3, docIdToGroup[i]); assertEquals(8, docIdToGroup[i + 10]); } else { assertEquals(5, docIdToGroup[i]); assertEquals(10, docIdToGroup[i + 10]); } } } @Test public void testStringMultiInequalitySplit() throws ImhotepOutOfMemoryException { final List<String> fields = Arrays.asList("sf1", "sf2"); final List<String> emptyList = Arrays.<String> asList(); MockFlamdexReader r = new MockFlamdexReader(emptyList, fields, emptyList, 10); for (int i = 0; i < 10; i++) { r.addStringTerm("sf1", "" + i, i); r.addStringTerm("sf2", "" + i, i); } ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2, 3 }, new RegroupCondition[] { new RegroupCondition( "sf1", false, 0, "5", true), new RegroupCondition( "sf2", false, 0, "7", true), new RegroupCondition( "sf1", false, 0, "9", true), }) }); final int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); for (int i = 0; i < 10; i++) { if (i <= 5) { assertEquals(1, docIdToGroup[i]); } else if (i <= 7) { assertEquals(2, docIdToGroup[i]); } else if (i <= 9) { assertEquals(3, docIdToGroup[i]); } else { assertEquals(5, docIdToGroup[i]); } } } @Test public void testStringMultiParallelInequalitySplit() throws ImhotepOutOfMemoryException { final List<String> fields = Arrays.asList("sf1", "sf2"); final List<String> empty = Arrays.<String> asList(); MockFlamdexReader r = new MockFlamdexReader(empty, fields, empty, 20); for (int i = 0; i < 10; i++) { r.addStringTerm("sf1", "" + i, i, i + 10); } for (int i = 0; i < 10; i++) { r.addStringTerm("sf2", "1", Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); } ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 1, new int[] { 2 }, new RegroupCondition[] { new RegroupCondition( "sf2", false, 0, "1", false) }) }); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 5, new int[] { 1, 2, 3 }, new RegroupCondition[] { new RegroupCondition( "sf1", false, 0, "5", true), new RegroupCondition( "sf1", false, 0, "7", true), new RegroupCondition( "sf1", false, 0, "9", true), }), new GroupMultiRemapRule( 2, 10, new int[] { 6, 7, 8 }, new RegroupCondition[] { new RegroupCondition( "sf1", false, 0, "5", true), new RegroupCondition( "sf1", false, 0, "7", true), new RegroupCondition( "sf1", false, 0, "9", true), }) }); final int[] docIdToGroup = new int[20]; session.exportDocIdToGroupId(docIdToGroup); for (int i = 0; i < 10; i++) { if (i <= 5) { assertEquals(1, docIdToGroup[i]); assertEquals(6, docIdToGroup[i + 10]); } else if (i <= 7) { assertEquals(2, docIdToGroup[i]); assertEquals(7, docIdToGroup[i + 10]); } else if (i <= 9) { assertEquals(3, docIdToGroup[i]); assertEquals(8, docIdToGroup[i + 10]); } else { assertEquals(5, docIdToGroup[i]); assertEquals(10, docIdToGroup[i + 10]); } } } @Test public void testManyGroupMultiRemapRuleThings() throws ImhotepOutOfMemoryException { final List<String> intFields = Arrays.asList("if1", "if2"); final List<String> stringFields = Arrays.asList("sf1", "sf2"); final List<String> emptyList = Arrays.<String> asList(); final int numDocs = 7; MockFlamdexReader r = new MockFlamdexReader(intFields, stringFields, emptyList, numDocs); int[] i1terms = new int[] { 1, 2, 3, 4, 5, 6 }; int[] i2terms = new int[] { 5, 1, 2, 3, 8, 7 }; String[] s1terms = new String[] { "e", "d", "c", "bc", "b", "a" }; String[] s2terms = new String[] { "foo", "bar", "baz", "foo", "bar", "baz" }; addIntField(r, "if1", i1terms); addIntField(r, "if2", i2terms); addStringField(r, "sf1", s1terms); addStringField(r, "sf2", s2terms); ImhotepLocalSession session = new ImhotepLocalSession(r); testAllInequalitySplits(numDocs, "if1", i1terms, session); testAllInequalitySplits(numDocs, "if2", i2terms, session); testAllInequalitySplits(numDocs, "sf1", s1terms, session); testAllInequalitySplits(numDocs, "sf2", s2terms, session); final int[] docIdToGroup = new int[numDocs]; // Try parallel inequality regroups, verify that later ones do not // override earlier ones. session.resetGroups(); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 2, new int[] { 1, 3 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 3, null, true), new RegroupCondition( "sf1", false, 0, "bc", true) }) }); session.exportDocIdToGroupId(docIdToGroup); for (int i = 0; i < i1terms.length; i++) { if (i1terms[i] <= 3) { assertEquals(1, docIdToGroup[i]); } else { assertEquals(3, docIdToGroup[i]); } } for (int i = i1terms.length; i < numDocs; i++) { assertEquals(2, docIdToGroup[i]); } // Try the opposite ordering of priority session.resetGroups(); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 2, new int[] { 1, 3 }, new RegroupCondition[] { new RegroupCondition( "sf1", false, 0, "bc", true), new RegroupCondition( "if1", true, 3, null, true) }) }); session.exportDocIdToGroupId(docIdToGroup); for (int i = 0; i < s1terms.length; i++) { if (s1terms[i].compareTo("bc") <= 0) { assertEquals(1, docIdToGroup[i]); } else { assertEquals(3, docIdToGroup[i]); } } for (int i = s1terms.length; i < numDocs; i++) { assertEquals(2, docIdToGroup[i]); } } @Test public void testEmptyMultisplit() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.<String> asList(), Arrays.<String> asList(), 10); for (int i = 0; i < 10; i++) { r.addIntTerm("if1", i, i); } ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 2, new int[] { 1 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 5, null, true) }) }); session.regroup(new GroupMultiRemapRule[] {}); final int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); for (int group : docIdToGroup) { assertEquals(0, group); } session.close(); } @Test public void testUntargetedGroup() throws ImhotepOutOfMemoryException { MockFlamdexReader r = new MockFlamdexReader(Arrays.asList("if1"), Arrays.<String> asList(), Arrays.<String> asList(), 10); for (int i = 0; i < 10; i++) { r.addIntTerm("if1", i, i); } ImhotepLocalSession session = new ImhotepLocalSession(r); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 2, 3, new int[] { 1 }, new RegroupCondition[] { new RegroupCondition( "if1", true, 1, null, false) }) }); final int[] docIdToGroup = new int[10]; session.exportDocIdToGroupId(docIdToGroup); for (int group : docIdToGroup) { assertEquals(0, group); } } private void testAllInequalitySplits(int numDocs, String field, int[] terms, ImhotepLocalSession session) throws ImhotepOutOfMemoryException { testTermInequalitySplit(numDocs, field, terms, session, Integer.MIN_VALUE); testTermInequalitySplit(numDocs, field, terms, session, Integer.MAX_VALUE); for (int term : terms) { testTermInequalitySplit(numDocs, field, terms, session, term); testTermInequalitySplit(numDocs, field, terms, session, term - 1); testTermInequalitySplit(numDocs, field, terms, session, term + 1); } } private void testTermInequalitySplit(int numDocs, String field, int[] terms, ImhotepLocalSession session, int term) throws ImhotepOutOfMemoryException { session.resetGroups(); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 0, new int[] { 1 }, new RegroupCondition[] { new RegroupCondition( field, true, term, null, true) }) }); final int[] docIdToGroup = new int[numDocs]; session.exportDocIdToGroupId(docIdToGroup); for (int docid = 0; docid < terms.length; docid++) { if (terms[docid] <= term) { assertEquals(1, docIdToGroup[docid]); } else { assertEquals(0, docIdToGroup[docid]); } } for (int docid = terms.length; docid < numDocs; docid++) { assertEquals(0, docIdToGroup[docid]); } } private void testAllInequalitySplits(int numDocs, String field, String[] terms, ImhotepLocalSession session) throws ImhotepOutOfMemoryException { testTermInequalitySplit(numDocs, field, terms, session, ""); for (String term : terms) { if (term.length() >= 1) { testTermInequalitySplit(numDocs, field, terms, session, term.substring(0, term.length() - 1)); } testTermInequalitySplit(numDocs, field, terms, session, term); testTermInequalitySplit(numDocs, field, terms, session, term + "a"); } } private void testTermInequalitySplit(int numDocs, String field, String[] terms, ImhotepLocalSession session, String term) throws ImhotepOutOfMemoryException { session.resetGroups(); session.regroup(new GroupMultiRemapRule[] { new GroupMultiRemapRule( 1, 0, new int[] { 1 }, new RegroupCondition[] { new RegroupCondition( field, false, 0, term, true) }) }); final int[] docIdToGroup = new int[numDocs]; session.exportDocIdToGroupId(docIdToGroup); for (int docid = 0; docid < terms.length; docid++) { if (terms[docid].compareTo(term) <= 0) { assertEquals(1, docIdToGroup[docid]); } else { assertEquals(0, docIdToGroup[docid]); } } for (int docid = terms.length; docid < numDocs; docid++) { assertEquals(0, docIdToGroup[docid]); } } private void addIntField(MockFlamdexReader r, String fieldName, int[] terms) { Map<Integer, List<Integer>> map = Maps.newHashMap(); for (int ix = 0; ix < terms.length; ix++) { if (!map.containsKey(terms[ix])) { map.put(terms[ix], Lists.<Integer> newArrayList()); } map.get(terms[ix]).add(ix); } for (final Integer term : map.keySet()) { final List<Integer> docs = map.get(term); r.addIntTerm(fieldName, term, docs); } } private void addStringField(MockFlamdexReader r, String fieldName, String[] terms) { Map<String, List<Integer>> map = Maps.newHashMap(); for (int ix = 0; ix < terms.length; ix++) { if (!map.containsKey(terms[ix])) { map.put(terms[ix], Lists.<Integer> newArrayList()); } map.get(terms[ix]).add(ix); } for (final String term : map.keySet()) { final List<Integer> docs = map.get(term); r.addStringTerm(fieldName, term, docs); } } @Test public void testConditionalUpdateDynamicMetric() throws ImhotepOutOfMemoryException { final int[] iCanCount = new int[10]; for (int i = 0; i < iCanCount.length; i++) { iCanCount[i] = i; } final MockFlamdexReader r = new MockFlamdexReader(); r.addIntTerm("if1", 0, 0, 2, 4, 6, 8); r.addIntTerm("if1", 1, 1, 3, 5, 7, 9); r.addStringTerm("sf1", "even", 0, 2, 4, 6, 8); r.addStringTerm("sf1", "odd", 1, 3, 5, 7, 9); final ImhotepLocalSession session = new ImhotepLocalSession(r); final String METRIC_NAME = "test metric!"; session.createDynamicMetric(METRIC_NAME); final long[] exported = new long[10]; // Should be a no-op session.conditionalUpdateDynamicMetric(METRIC_NAME, new RegroupCondition[] { new RegroupCondition("if1", true, 0, null, false), new RegroupCondition("sf1", false, 0, "even", false) }, new int[] { 100, -100 }); final DynamicMetric metric = session.getDynamicMetrics().get(METRIC_NAME); metric.lookup(iCanCount, exported, 10); Assert.assertArrayEquals(new long[10], exported); // Should increase odd terms by 10 session.conditionalUpdateDynamicMetric(METRIC_NAME, new RegroupCondition[] { new RegroupCondition("if1", true, 1, null, false) }, new int[] { 10 }); metric.lookup(iCanCount, exported, 10); Assert.assertArrayEquals(new long[] { 0, 10, 0, 10, 0, 10, 0, 10, 0, 10 }, exported); } @Test public void testPushStatFloatScale() throws ImhotepOutOfMemoryException { final FlamdexReader r = MakeAFlamdex.make(); final ImhotepLocalSession session = new ImhotepLocalSession(r); session.pushStat("floatscale floatfield*100+9000"); // like iplat long[] stats = session.getGroupStats(0); long scaledSum = stats[1]; // we have 5 documents for each of 4 values: 1.5, 2.5, 0 and 18000 long expectedSum = (long) ((1.5 * 100 + 9000) + (2.5 * 100 + 9000) + (0 * 100 + 9000) + (18000 * 100 + 9000)) * 5; Assert.assertEquals("Sum of scaled values", expectedSum, scaledSum, 0.001); session.close(); } @Test public void testGroup0Filtering() throws ImhotepOutOfMemoryException, IOException { /* make session 1 */ final FlamdexReader r1 = MakeAFlamdex.make(); final ImhotepLocalSession session1 = new ImhotepLocalSession(r1, "/tmp/imhotep.test", new MemoryReservationContext(new ImhotepMemoryPool(Long.MAX_VALUE)), false, null); session1.pushStat("count()"); session1.createDynamicMetric("foo"); session1.createDynamicMetric("bar"); int[] bar = { 0, 13 }; session1.updateDynamicMetric("bar", bar); session1.regroup(new GroupRemapRule[] { new GroupRemapRule(1, new RegroupCondition("if3", true, 9999, null, false), 1, 2) }); int[] foo = { 0, 1, 2 }; session1.updateDynamicMetric("foo", foo); session1.regroup(new GroupRemapRule[] { new GroupRemapRule(1, new RegroupCondition("if3", true, 19, null, false), 0, 2), new GroupRemapRule(2, new RegroupCondition("sf2", false, 0, "b", false), 3, 4) }); int[] fo = { 0, 0, 0, 0, 1 }; session1.updateDynamicMetric("foo", fo); long[] stats1 = session1.getGroupStats(0); assertEquals(10, stats1[0]); assertEquals(0, stats1[1]); assertEquals(5, stats1[2]); assertEquals(4, stats1[3]); assertEquals(1, stats1[4]); /* optimize session */ session1.rebuildAndFilterIndexes(Arrays.asList("if1", "if3"), Arrays.asList("sf1", "sf3", "sf4")); GroupLookup gl = session1.docIdToGroup; assertEquals(5, gl.getNumGroups()); assertEquals(10, gl.size()); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 5) { assertEquals(Integer.toString(i) + " in wrong group", 2, gl.get(i)); } if (i >= 5 && i < 7) { assertEquals(Integer.toString(i) + " in wrong group", 3, gl.get(i)); } if (i >= 7 && i < 8) { assertEquals(Integer.toString(i) + " in wrong group", 4, gl.get(i)); } if (i >= 8 && i < 10) { assertEquals(Integer.toString(i) + " in wrong group", 3, gl.get(i)); } } /* check the dynamic metric */ Map<String, DynamicMetric> dynamicMetrics = session1.getDynamicMetrics(); /* check all the groups are there */ assertEquals(dynamicMetrics.size(), 2); assertNotNull(dynamicMetrics.get("foo")); assertNotNull(dynamicMetrics.get("bar")); /* check dynamic metrics per group */ DynamicMetric dm = dynamicMetrics.get("foo"); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 5) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 1, dm.lookupSingleVal(i)); } if (i >= 5 && i < 7) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 2, dm.lookupSingleVal(i)); } if (i >= 7 && i < 8) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 3, dm.lookupSingleVal(i)); } if (i >= 8 && i < 10) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 2, dm.lookupSingleVal(i)); } } dm = dynamicMetrics.get("bar"); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 10) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 13, dm.lookupSingleVal(i)); } } /* try another regroup */ session1.createDynamicMetric("cat"); int[] cat = { 0, 17 }; session1.updateDynamicMetric("cat", cat); session1.regroup(new GroupRemapRule[] { new GroupRemapRule(2, new RegroupCondition("if3", true, 5, null, false), 1, 6), new GroupRemapRule(3, new RegroupCondition("if3", true, 10000, null, false), 2, 7) }); stats1 = session1.getGroupStats(0); assertEquals(1, stats1[0]); assertEquals(5, stats1[1]); assertEquals(4, stats1[2]); int[] foo2 = { 0, 7, 11 }; session1.updateDynamicMetric("foo", foo2); /* optimize session */ session1.rebuildAndFilterIndexes(Arrays.asList("if1", "if3"), Arrays.asList("sf1", "sf3", "sf4")); stats1 = session1.getGroupStats(0); assertEquals(0, stats1[0]); assertEquals(5, stats1[1]); assertEquals(4, stats1[2]); gl = session1.docIdToGroup; assertEquals(3, gl.getNumGroups()); assertEquals(9, gl.size()); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 5) { assertEquals(Integer.toString(i) + " in wrong group", 1, gl.get(i)); } if (i >= 5 && i < 9) { assertEquals(Integer.toString(i) + " in wrong group", 2, gl.get(i)); } } /* check dynamic metrics per group */ dynamicMetrics = session1.getDynamicMetrics(); dm = dynamicMetrics.get("foo"); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 5) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 8, dm.lookupSingleVal(i)); } if (i >= 5 && i < 9) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 13, dm.lookupSingleVal(i)); } } dm = dynamicMetrics.get("bar"); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 9) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 13, dm.lookupSingleVal(i)); } } session1.close(); } @Test public void testOptimizeThenReset() throws ImhotepOutOfMemoryException, IOException { /* make session 1 */ final FlamdexReader r1 = MakeAFlamdex.make(); final ImhotepLocalSession session1 = new ImhotepLocalSession(r1, "/tmp/imhotep.test", new MemoryReservationContext(new ImhotepMemoryPool(Long.MAX_VALUE)), false, null); session1.pushStat("count()"); session1.createDynamicMetric("foo"); session1.createDynamicMetric("bar"); int[] bar = { 0, 13 }; session1.updateDynamicMetric("bar", bar); session1.regroup(new GroupRemapRule[] { new GroupRemapRule(1, new RegroupCondition("if3", true, 9999, null, false), 1, 2) }); int[] foo = { 0, 1, 2 }; session1.updateDynamicMetric("foo", foo); session1.regroup(new GroupRemapRule[] { new GroupRemapRule(1, new RegroupCondition("if3", true, 19, null, false), 0, 2), new GroupRemapRule(2, new RegroupCondition("sf2", false, 0, "b", false), 3, 4) }); int[] fo = { 0, 0, 0, 0, 1 }; session1.updateDynamicMetric("foo", fo); /* optimize session */ session1.rebuildAndFilterIndexes(Arrays.asList("if1", "if3"), Arrays.asList("sf1", "sf3", "sf4")); /* try another regroup */ session1.createDynamicMetric("cat"); int[] cat = { 0, 3, 3, 3, 3 }; session1.updateDynamicMetric("cat", cat); int[] fo2 = { 0, 0, 0, 0, 2 }; session1.updateDynamicMetric("foo", fo2); session1.regroup(new GroupRemapRule[] { new GroupRemapRule(2, new RegroupCondition("if3", true, 5, null, false), 1, 6), new GroupRemapRule(3, new RegroupCondition("if3", true, 10000, null, false), 2, 7) }); int[] foo2 = { 0, 7, 11 }; session1.updateDynamicMetric("foo", foo2); /* optimize session */ session1.rebuildAndFilterIndexes(Arrays.asList("if1", "if3"), Arrays.asList("sf1", "sf3", "sf4")); int[] foo3 = { 0, 0, 1 }; session1.updateDynamicMetric("foo", foo3); /* reset */ session1.resetGroups(); /* check groups */ GroupLookup gl = session1.docIdToGroup; assertEquals(2, gl.getNumGroups()); assertEquals(20, gl.size()); /* check the dynamic metric */ Map<String, DynamicMetric> dynamicMetrics = session1.getDynamicMetrics(); /* check all the groups are there */ assertEquals(dynamicMetrics.size(), 3); assertNotNull(dynamicMetrics.get("foo")); assertNotNull(dynamicMetrics.get("bar")); assertNotNull(dynamicMetrics.get("cat")); /* check dynamic metrics per group */ DynamicMetric dm = dynamicMetrics.get("bar"); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 20) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 13, dm.lookupSingleVal(i)); } } dm = dynamicMetrics.get("cat"); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 5) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 0, dm.lookupSingleVal(i)); } if (i >= 5 && i < 15) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 3, dm.lookupSingleVal(i)); } if (i >= 15 && i < 20) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 0, dm.lookupSingleVal(i)); } } dm = dynamicMetrics.get("foo"); for (int i = 0; i < gl.size(); i++) { if (i >= 0 && i < 5) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 1, dm.lookupSingleVal(i)); } if (i >= 5 && i < 10) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 8, dm.lookupSingleVal(i)); } if (i >= 10 && i < 12) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 14, dm.lookupSingleVal(i)); } if (i >= 12 && i < 13) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 5, dm.lookupSingleVal(i)); } if (i >= 13 && i < 15) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 14, dm.lookupSingleVal(i)); } if (i >= 15 && i < 20) { assertEquals(Integer.toString(i) + " has wrong dynamic metric", 1, dm.lookupSingleVal(i)); } } session1.close(); } @Test public void testRegexMetric() throws ImhotepOutOfMemoryException { final FlamdexReader r = MakeAFlamdex.make(); final ImhotepLocalSession session = new ImhotepLocalSession(r, "/tmp/imhotep.test", new MemoryReservationContext(new ImhotepMemoryPool(Long.MAX_VALUE)), false, null); session.pushStat("regex if1:9000"); Assert.assertArrayEquals(new long[]{0, 3}, session.getGroupStats(0)); session.popStat(); session.pushStat("regex if3:.*9"); Assert.assertArrayEquals(new long[]{0, 10}, session.getGroupStats(0)); session.popStat(); session.pushStat("regex if3:notaninteger"); Assert.assertArrayEquals(new long[]{0, 0}, session.getGroupStats(0)); session.popStat(); session.pushStat("regex sf1:"); Assert.assertArrayEquals(new long[]{0, 2}, session.getGroupStats(0)); session.popStat(); session.pushStat("regex sf2:b"); Assert.assertArrayEquals(new long[]{0, 4}, session.getGroupStats(0)); session.popStat(); session.pushStat("regex floatfield:[0-9]*\\.[0-9]*"); Assert.assertArrayEquals(new long[]{0, 10}, session.getGroupStats(0)); session.popStat(); session.pushStat("regex nonexistent:anything"); Assert.assertArrayEquals(new long[]{0, 0}, session.getGroupStats(0)); session.popStat(); } }