/* * Copyright (c) 2011 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.flaptor.indextank.index; import static com.flaptor.util.TestInfo.TestType.SYSTEM; import java.io.File; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; import com.flaptor.indextank.BoostingIndexer; import com.flaptor.indextank.IndexTankTestCase; import com.flaptor.indextank.index.scorer.VariablesRangeFilter; import com.flaptor.indextank.query.ParseException; import com.flaptor.indextank.query.Query; import com.flaptor.indextank.query.TermQuery; import com.flaptor.indextank.search.DocumentSearcher; import com.flaptor.indextank.search.SearchResult; import com.flaptor.indextank.search.SearchResults; import com.flaptor.util.FileUtil; import com.flaptor.util.TestInfo; import com.google.common.collect.HashMultimap; import com.google.common.collect.HashMultiset; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Multiset; import com.google.common.collect.Sets; import java.util.Arrays; public class IndexEngineTest extends IndexTankTestCase { private IndexEngine indexEngine; private File tempDir; private static final int DOCS = 12; @Override protected void setUp() throws Exception { super.setUp(); this.tempDir = FileUtil.createTempDir("indextank","testcase"); this.indexEngine = new IndexEngine(this.tempDir, 11234, 5, false, 5, IndexEngine.SuggestValues.DOCUMENTS, IndexEngine.StorageValues.NO, 0, null, true, "dummyCode", "TEST-environment"); } private void indexTwelveDocs(BoostingIndexer indexer) { long timestamp = System.currentTimeMillis()/ 1000L; for (int i = 0; i < DOCS; i++){ Document doc = new Document(); doc.setField("text","term"+i+" fixed"); doc.setField("timestamp", String.valueOf(timestamp)); indexer.add("doc_"+i,doc, (int)timestamp, Maps.<Integer, Double>newHashMap()); timestamp -= 1000; } } private List<String[]> getFacetingDocumentsData() { List<String[]> result = Lists.newArrayList(); result.add(new String[] {"D1", "A", "BAJO", "NAH"}); result.add(new String[] {"D2", "A", "BAJO", "SI"}); result.add(new String[] {"D3", "A", "ALTO", "NAH"}); result.add(new String[] {"D4", "A B", "ALTO", "SI"}); result.add(new String[] {"D5", "A B", "BAJO", "NAH"}); result.add(new String[] {"D6", "B", "BAJO", "SI"}); result.add(new String[] {"D7", "B", null, "NAH"}); result.add(new String[] {"D8", "B", null, "TERCERO"}); return result; } private void indexForwardFacetedToDocuments(BoostingIndexer indexer) { List<String[]> facetingDocumentsData = getFacetingDocumentsData(); indexFacetedDocuments(facetingDocumentsData, indexer); } private void indexBackwardsFacetedToDocuments(BoostingIndexer indexer) { List<String[]> facetingDocumentsData = getFacetingDocumentsData(); indexFacetedDocuments(Lists.newArrayList(Iterables.reverse(facetingDocumentsData)), indexer); } private void indexFacetedDocuments(List<String[]> data, BoostingIndexer indexer) { long timestamp = System.currentTimeMillis()/ 1000L; for (String[] datum : data) { Document doc = new Document(); doc.setField("text", datum[1]); doc.setField("timestamp", String.valueOf(timestamp)); indexer.add(datum[0],doc, (int)timestamp, Maps.<Integer, Double>newHashMap()); timestamp -= 1000; Map<String, String> categories = Maps.newHashMap(); if (datum[2] != null) { categories.put("PRECIO", datum[2]); } if (datum[3] != null) { categories.put("TIPO", datum[3]); } indexer.updateCategories(datum[0], categories); } } private void indexLengthDifferentDocs(BoostingIndexer indexer, int count, int k) { long timestamp = System.currentTimeMillis()/ 1000L; for (int i = 0; i < count; i++){ Document doc = new Document(); String text = "foo"; for (int j = 0; j < i*k; j++) { text += " bar"; } doc.setField("text", text); doc.setField("timestamp", String.valueOf(timestamp)); indexer.add("http://"+i,doc, (int)timestamp, Maps.<Integer, Double>newHashMap()); timestamp -= 1000; } } @Override protected void tearDown() throws Exception { super.tearDown(); indexEngine = null; } @TestInfo(testType=SYSTEM) public void testSwitch() throws IOException, ParseException, InterruptedException { indexTwelveDocs(this.indexEngine.getIndexer()); DocumentSearcher searcher = this.indexEngine.getSearcher(); SearchResults srs = searcher.search(new Query(new TermQuery("text","term1"),null,null),0,10, 0); assertEquals("Number of historic results doesn't match", 1, srs.getMatches()); srs = searcher.search(new Query(new TermQuery("text","term11"),null,null),0,10, 0); assertEquals("Number of real time results doesn't match", 1, srs.getMatches()); Query query = new Query(this.indexEngine.getParser().parseQuery("term1 OR term2 OR term3 OR term4 OR term5"),null,null); srs = searcher.search(query,0,10, 0); assertEquals("Number of real time results doesn't match", 5, srs.getMatches()); } @TestInfo(testType=SYSTEM) public void testSwitchedDuplicates() throws IOException, ParseException, InterruptedException { indexLengthDifferentDocs(this.indexEngine.getIndexer(), 33, 1); indexLengthDifferentDocs(this.indexEngine.getIndexer(), 33, 2); Query query = new Query(this.indexEngine.getParser().parseQuery("foo"),null,null); SearchResults srs = this.indexEngine.getSearcher().search(query,0,100, 0); assertEquals("Number of results doesn't match", 33, srs.getMatches()); Multiset<String> ids = HashMultiset.create(); for (SearchResult r : srs.getResults()) { ids.add(r.getDocId()); } assertEquals("Number of actual results doesn't match", 33, ids.size()); assertEquals("Number of different results doesn't match", 33, ids.elementSet().size()); } private void checkResults(DocumentSearcher searcher, int start, int len, int[] expectedIds) throws InterruptedException { SearchResults srs = searcher.search(new Query(new TermQuery("text","fixed"),"fixed",null),start,len, 0); Set<Integer> expIds = Sets.newHashSet(); for (int i : expectedIds) { expIds.add(i); } int n = 0; Set<Integer> actIds = Sets.newHashSet(); for (SearchResult r : srs.getResults()) { String docid = r.getDocId(); int i = Integer.parseInt(docid.substring(docid.indexOf('_')+1)); actIds.add(i); n++; } assertEquals("Results page doesn't contain the expected docids", expIds, actIds); } @TestInfo(testType=SYSTEM) public void testPagination() throws IOException, ParseException, InterruptedException { indexTwelveDocs(this.indexEngine.getIndexer()); DocumentSearcher searcher = this.indexEngine.getSearcher(); checkResults(searcher,0,5,new int[]{0,1,2,3,4}); checkResults(searcher,5,5,new int[]{5,6,7,8,9}); checkResults(searcher,10,5,new int[]{10,11}); } @TestInfo(testType=SYSTEM) public void testPromotion() throws IOException, ParseException,InterruptedException { indexTwelveDocs(this.indexEngine.getIndexer()); DocumentSearcher searcher = this.indexEngine.getSearcher(); BoostingIndexer indexer = this.indexEngine.getIndexer(); indexer.promoteResult("doc_6", "fixed"); checkResults(searcher,0,1,new int[]{6}); checkResults(searcher,0,5,new int[]{0,1,2,3,6}); checkResults(searcher,5,5,new int[]{4,5,7,8,9}); checkResults(searcher,10,5,new int[]{10,11}); indexer.promoteResult("doc_10", "fixed"); checkResults(searcher,10,5,new int[]{9,11}); } @TestInfo(testType=SYSTEM) public void testFacetedSearch() throws InterruptedException { indexForwardFacetedToDocuments(this.indexEngine.getIndexer()); DocumentSearcher searcher = this.indexEngine.getSearcher(); SearchResults searchResults = searcher.search(new Query(new TermQuery("text","a"),"a",null), 0, 10, 0); Map<String, Multiset<String>> facets = searchResults.getFacets(); //System.out.println("Matches: " + searchResults.getMatches()); //System.out.println(facets); assertEquals(2, facets.keySet().size()); Multiset<String> precioFacet = facets.get("PRECIO"); assertEquals(2, precioFacet.elementSet().size()); assertEquals(2, precioFacet.count("ALTO")); assertEquals(3, precioFacet.count("BAJO")); Multiset<String> tipoFacet = facets.get("TIPO"); assertEquals(2, tipoFacet.elementSet().size()); assertEquals(3, tipoFacet.count("NAH")); assertEquals(2, tipoFacet.count("SI")); searchResults = searcher.search(new Query(new TermQuery("text","b"),"b",null), 0, 10, 0); facets = searchResults.getFacets(); //System.out.println("Matches: " + searchResults.getMatches()); //System.out.println(facets); assertEquals(2, facets.keySet().size()); precioFacet = facets.get("PRECIO"); assertEquals(2, precioFacet.elementSet().size()); assertEquals(1, precioFacet.count("ALTO")); assertEquals(2, precioFacet.count("BAJO")); tipoFacet = facets.get("TIPO"); assertEquals(3, tipoFacet.elementSet().size()); assertEquals(2, tipoFacet.count("NAH")); assertEquals(2, tipoFacet.count("SI")); assertEquals(1, tipoFacet.count("TERCERO")); } @TestInfo(testType=SYSTEM) public void testBackwardsFacetedSearch() throws InterruptedException { indexBackwardsFacetedToDocuments(this.indexEngine.getIndexer()); DocumentSearcher searcher = this.indexEngine.getSearcher(); SearchResults searchResults = searcher.search(new Query(new TermQuery("text","a"),"a",null), 0, 10, 0); Map<String, Multiset<String>> facets = searchResults.getFacets(); //System.out.println("Matches: " + searchResults.getMatches()); //System.out.println(facets); assertEquals(2, facets.keySet().size()); Multiset<String> precioFacet = facets.get("PRECIO"); assertEquals(2, precioFacet.elementSet().size()); assertEquals(2, precioFacet.count("ALTO")); assertEquals(3, precioFacet.count("BAJO")); Multiset<String> tipoFacet = facets.get("TIPO"); assertEquals(2, tipoFacet.elementSet().size()); assertEquals(3, tipoFacet.count("NAH")); assertEquals(2, tipoFacet.count("SI")); searchResults = searcher.search(new Query(new TermQuery("text","b"),"b",null), 0, 10, 0); facets = searchResults.getFacets(); //System.out.println("Matches: " + searchResults.getMatches()); //System.out.println(facets); assertEquals(2, facets.keySet().size()); precioFacet = facets.get("PRECIO"); assertEquals(2, precioFacet.elementSet().size()); assertEquals(1, precioFacet.count("ALTO")); assertEquals(2, precioFacet.count("BAJO")); tipoFacet = facets.get("TIPO"); assertEquals(3, tipoFacet.elementSet().size()); assertEquals(2, tipoFacet.count("NAH")); assertEquals(2, tipoFacet.count("SI")); assertEquals(1, tipoFacet.count("TERCERO")); } private void checkSearchResults(Iterable<SearchResult> results, String[] strings) { int i = 0; for (SearchResult searchResult : results) { assertEquals("Wrong results", strings[i], searchResult.getDocId().toString()); i++; } } @TestInfo(testType=SYSTEM) public void testFacetFiltering() throws InterruptedException, ParseException { indexBackwardsFacetedToDocuments(this.indexEngine.getIndexer()); DocumentSearcher searcher = this.indexEngine.getSearcher(); Multimap<String, String> categoriesFilter = HashMultimap.create(); categoriesFilter.put("PRECIO", "BAJO"); SearchResults searchResults = searcher.search(new Query(indexEngine.getParser().parseQuery("a OR b"),"a OR b", null, categoriesFilter, VariablesRangeFilter.NO_FILTER), 0, 10, 0); int matches = searchResults.getMatches(); Map<String, Multiset<String>> facets = searchResults.getFacets(); //System.out.println(matches); //System.out.println(searchResults.getResults()); //System.out.println(searchResults.getFacets()); assertEquals(4, matches); checkSearchResults(searchResults.getResults(), new String[] {"D6", "D5", "D2", "D1"}); categoriesFilter = HashMultimap.create(); categoriesFilter.put("TIPO", "NAH"); categoriesFilter.put("TIPO", "TERCERO"); searchResults = searcher.search(new Query(indexEngine.getParser().parseQuery("a OR b"),"a OR b", null, categoriesFilter, VariablesRangeFilter.NO_FILTER), 0, 10, 0); matches = searchResults.getMatches(); //System.out.println(matches); //System.out.println(searchResults.getResults()); //System.out.println(searchResults.getFacets()); assertEquals(5, matches); checkSearchResults(searchResults.getResults(), new String[] {"D8", "D7", "D5", "D3", "D1"}); categoriesFilter = HashMultimap.create(); categoriesFilter.put("TIPO", "NAH"); categoriesFilter.put("TIPO", "TERCERO"); categoriesFilter.put("PRECIO", "BAJO"); searchResults = searcher.search(new Query(indexEngine.getParser().parseQuery("a OR b"),"a OR b", null, categoriesFilter, VariablesRangeFilter.NO_FILTER), 0, 10, 0); matches = searchResults.getMatches(); //System.out.println(matches); //System.out.println(searchResults.getResults()); //System.out.println(searchResults.getFacets()); assertEquals(2, matches); checkSearchResults(searchResults.getResults(), new String[] {"D5", "D1"}); } }