/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.imhotep.local; import com.indeed.util.core.Pair; import com.indeed.flamdex.reader.MockFlamdexReader; import com.indeed.imhotep.GroupRemapRule; import com.indeed.imhotep.RegroupCondition; import com.indeed.imhotep.api.FTGSIterator; import com.indeed.imhotep.api.ImhotepOutOfMemoryException; import org.junit.Test; import java.util.Arrays; import java.util.List; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; /** * @author jwolfe */ public class TestFlamdexFTGSIterator { private static final String INT_ITERATION_FIELD = "iterationField"; private static final String STRING_ITERATION_FIELD = "stringIterationField"; private static final String METRIC_FIELD = "metricField"; // this field is silly and exists for regrouping purposes. private static final String DOCID_FIELD = "docIdField"; enum BitsetOptimizationLevel { DONT_OPTIMIZE, OPTIMIZE, } @Test public void testSimpleIteration() throws ImhotepOutOfMemoryException { for (BitsetOptimizationLevel level : BitsetOptimizationLevel.values()) { ImhotepLocalSession session = makeTestSession(level); FTGSIterator ftgsIterator = session.getFTGSIterator(new String[]{INT_ITERATION_FIELD}, new String[]{STRING_ITERATION_FIELD}); try { testExpectedIntField(ftgsIterator); testExpectedStringField(ftgsIterator); assertEquals(false, ftgsIterator.nextField()); } finally { ftgsIterator.close(); session.close(); } } } @Test public void testSkippingField() throws ImhotepOutOfMemoryException { for (BitsetOptimizationLevel level : BitsetOptimizationLevel.values()) { ImhotepLocalSession session = makeTestSession(level); FTGSIterator ftgsIterator = session.getFTGSIterator(new String[]{INT_ITERATION_FIELD}, new String[]{STRING_ITERATION_FIELD}); try { assertEquals(true, ftgsIterator.nextField()); testExpectedStringField(ftgsIterator); assertEquals(false, ftgsIterator.nextField()); } finally { ftgsIterator.close(); session.close(); } } } @Test public void testSkippingTerm() throws ImhotepOutOfMemoryException { for (BitsetOptimizationLevel level : BitsetOptimizationLevel.values()) { MockFlamdexReader r = new MockFlamdexReader(); r.addIntTerm("if1", 0, 1, 2); r.addIntTerm("if1", 1, 3, 4); ImhotepLocalSession session = new ImhotepLocalSession(r, level == BitsetOptimizationLevel.OPTIMIZE); session.pushStat("count()"); FTGSIterator ftgsIterator = session.getFTGSIterator(new String[]{"if1"}, new String[]{}); try { final long[] stats = new long[1]; ftgsIterator.nextField(); ftgsIterator.nextTerm(); ftgsIterator.nextTerm(); assertEquals(1, ftgsIterator.termIntVal()); assertEquals(true, ftgsIterator.nextGroup()); assertEquals(1, ftgsIterator.group()); ftgsIterator.groupStats(stats); assertArrayEquals(new long[]{2}, stats); assertEquals(false, ftgsIterator.nextTerm()); assertEquals(false, ftgsIterator.nextField()); } finally { ftgsIterator.close(); session.close(); } } } @Test public void testEmptyField() throws ImhotepOutOfMemoryException { for (BitsetOptimizationLevel level : BitsetOptimizationLevel.values()) { MockFlamdexReader r = new MockFlamdexReader(); ImhotepLocalSession session = new ImhotepLocalSession(r, level == BitsetOptimizationLevel.OPTIMIZE); FTGSIterator ftgsIterator = session.getFTGSIterator(new String[]{"if1"}, new String[]{"sf1"}); try { assertEquals(true, ftgsIterator.nextField()); assertEquals("if1", ftgsIterator.fieldName()); assertEquals(false, ftgsIterator.nextTerm()); assertEquals(true, ftgsIterator.nextField()); assertEquals("sf1", ftgsIterator.fieldName()); assertEquals(false, ftgsIterator.nextTerm()); } finally { ftgsIterator.close(); session.close(); } } } @Test public void testZeroStats() throws ImhotepOutOfMemoryException { for (BitsetOptimizationLevel level : BitsetOptimizationLevel.values()) { MockFlamdexReader r = new MockFlamdexReader(); r.addIntTerm("if1", 1, 0, 1, 2); ImhotepLocalSession session = new ImhotepLocalSession(r, level == BitsetOptimizationLevel.OPTIMIZE); FTGSIterator ftgsIterator = session.getFTGSIterator(new String[]{"if1"}, new String[]{}); try { final long[] emptyBuff = new long[0]; assertEquals(true, ftgsIterator.nextField()); ftgsIterator.nextTerm(); ftgsIterator.group(); ftgsIterator.groupStats(emptyBuff); } finally { ftgsIterator.close(); session.close(); } // Just making sure nothing goes catastrophically wrong } } @Test public void testMultipleStats() throws ImhotepOutOfMemoryException { for (BitsetOptimizationLevel level : BitsetOptimizationLevel.values()) { ImhotepLocalSession session = makeTestSession(level); session.pushStat("count()"); FTGSIterator ftgsIterator = session.getFTGSIterator(new String[]{INT_ITERATION_FIELD}, new String[]{}); try { ftgsIterator.nextField(); expectTerms(Arrays.asList( new IntTerm(Integer.MIN_VALUE, Arrays.asList(Pair.of(2, new long[]{1, 3}))), new IntTerm(-1, Arrays.asList(Pair.of(1, new long[]{11, 3}))), new IntTerm(0, Arrays.asList(Pair.of(1, new long[]{0, 1}), Pair.of(2, new long[]{0, 2}))), new IntTerm(1, Arrays.asList(Pair.of(1, new long[]{11, 3}))), new IntTerm(Integer.MAX_VALUE, Arrays.asList(Pair.of(2, new long[]{1, 3}))) ), ftgsIterator); } finally { ftgsIterator.close(); session.close(); } } } private ImhotepLocalSession makeTestSession(BitsetOptimizationLevel level) throws ImhotepOutOfMemoryException { MockFlamdexReader r = makeTestFlamdexReader(); ImhotepLocalSession session = new ImhotepLocalSession(r, level == BitsetOptimizationLevel.OPTIMIZE); session.regroup(new GroupRemapRule[]{new GroupRemapRule(1, new RegroupCondition(DOCID_FIELD, true, 4, null, true), 2, 1)}); session.pushStat(METRIC_FIELD); return session; } private MockFlamdexReader makeTestFlamdexReader() { MockFlamdexReader r = new MockFlamdexReader( Arrays.asList(INT_ITERATION_FIELD, METRIC_FIELD, DOCID_FIELD), Arrays.<String>asList(STRING_ITERATION_FIELD), Arrays.asList(INT_ITERATION_FIELD, METRIC_FIELD, DOCID_FIELD), 10 ); r.addIntTerm(INT_ITERATION_FIELD, Integer.MIN_VALUE, 5, 7, 8); r.addIntTerm(INT_ITERATION_FIELD, -1, 1, 2, 3); r.addIntTerm(INT_ITERATION_FIELD, 0, 4, 8, 9); r.addIntTerm(INT_ITERATION_FIELD, 1, 0, 1, 2); r.addIntTerm(INT_ITERATION_FIELD, Integer.MAX_VALUE, 5, 7, 8); r.addStringTerm(STRING_ITERATION_FIELD, "", 1, 4, 9); r.addStringTerm(STRING_ITERATION_FIELD, "english", 1, 2, 3); r.addStringTerm(STRING_ITERATION_FIELD, "日本語", 4, 5, 6); r.addIntTerm(DOCID_FIELD, 0, 0); r.addIntTerm(DOCID_FIELD, 1, 1); r.addIntTerm(DOCID_FIELD, 2, 2); r.addIntTerm(DOCID_FIELD, 3, 3); r.addIntTerm(DOCID_FIELD, 4, 4); r.addIntTerm(DOCID_FIELD, 5, 5); r.addIntTerm(DOCID_FIELD, 6, 6); r.addIntTerm(DOCID_FIELD, 7, 7); r.addIntTerm(DOCID_FIELD, 8, 8); r.addIntTerm(DOCID_FIELD, 9, 9); r.addIntTerm(METRIC_FIELD, 0, 4, 7, 8, 9); r.addIntTerm(METRIC_FIELD, 1, 2, 5, 6); r.addIntTerm(METRIC_FIELD, 5, 0, 1, 3); return r; } private static class IntTerm { int term; List<Pair<Integer, long[]>> groupStats; private IntTerm(int term, List<Pair<Integer, long[]>> groupStats) { this.term = term; this.groupStats = groupStats; } } private void expectTerms(List<IntTerm> terms, FTGSIterator ftgsIterator) { long[] stats = new long[terms.get(0).groupStats.get(0).getSecond().length]; for (IntTerm term : terms) { assertEquals(true, ftgsIterator.nextTerm()); assertEquals(term.term, ftgsIterator.termIntVal()); for (Pair<Integer, long[]> group : term.groupStats) { assertEquals(true, ftgsIterator.nextGroup()); assertEquals((int)group.getFirst(), ftgsIterator.group()); ftgsIterator.groupStats(stats); assertArrayEquals(group.getSecond(), stats); } } } private void testExpectedIntField(FTGSIterator ftgsIterator) { assertEquals(true, ftgsIterator.nextField()); assertEquals(INT_ITERATION_FIELD, ftgsIterator.fieldName()); assertEquals(true, ftgsIterator.fieldIsIntType()); expectTerms(Arrays.asList( new IntTerm(Integer.MIN_VALUE, Arrays.asList(Pair.of(2, new long[]{1}))), new IntTerm(-1, Arrays.asList(Pair.of(1, new long[]{11}))), new IntTerm(0, Arrays.asList(Pair.of(1, new long[]{0}), Pair.of(2, new long[]{0}))), new IntTerm(1, Arrays.asList(Pair.of(1, new long[]{11}))), new IntTerm(Integer.MAX_VALUE, Arrays.asList(Pair.of(2, new long[]{1}))) ), ftgsIterator); assertEquals(false, ftgsIterator.nextGroup()); assertEquals(false, ftgsIterator.nextTerm()); } private void testExpectedStringField(FTGSIterator ftgsIterator) { long[] stats = new long[1]; assertEquals(true, ftgsIterator.nextField()); assertEquals(STRING_ITERATION_FIELD, ftgsIterator.fieldName()); assertEquals(false, ftgsIterator.fieldIsIntType()); assertEquals(true, ftgsIterator.nextTerm()); assertEquals("", ftgsIterator.termStringVal()); assertEquals(true, ftgsIterator.nextGroup()); assertEquals(1, ftgsIterator.group()); ftgsIterator.groupStats(stats); assertArrayEquals(new long[]{5}, stats); assertEquals(true, ftgsIterator.nextGroup()); assertEquals(2, ftgsIterator.group()); ftgsIterator.groupStats(stats); assertArrayEquals(new long[]{0}, stats); assertEquals(false, ftgsIterator.nextGroup()); assertEquals(true, ftgsIterator.nextTerm()); assertEquals("english", ftgsIterator.termStringVal()); assertEquals(true, ftgsIterator.nextGroup()); assertEquals(1, ftgsIterator.group()); ftgsIterator.groupStats(stats); assertArrayEquals(new long[]{11}, stats); assertEquals(false, ftgsIterator.nextGroup()); assertEquals(true, ftgsIterator.nextTerm()); assertEquals("日本語", ftgsIterator.termStringVal()); assertEquals(true, ftgsIterator.nextGroup()); assertEquals(1, ftgsIterator.group()); ftgsIterator.groupStats(stats); assertArrayEquals(new long[]{0}, stats); assertEquals(true, ftgsIterator.nextGroup()); assertEquals(2, ftgsIterator.group()); ftgsIterator.groupStats(stats); assertArrayEquals(new long[]{2}, stats); assertEquals(false, ftgsIterator.nextGroup()); assertEquals(false, ftgsIterator.nextTerm()); } }