/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.client.solrj.io.stream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.io.ClassificationEvaluation; import org.apache.solr.client.solrj.io.SolrClientCache; import org.apache.solr.client.solrj.io.Tuple; import org.apache.solr.client.solrj.io.comp.ComparatorOrder; import org.apache.solr.client.solrj.io.comp.FieldComparator; import org.apache.solr.client.solrj.io.eval.AddEvaluator; import org.apache.solr.client.solrj.io.eval.AndEvaluator; import org.apache.solr.client.solrj.io.eval.EqualsEvaluator; import org.apache.solr.client.solrj.io.eval.GreaterThanEqualToEvaluator; import org.apache.solr.client.solrj.io.eval.GreaterThanEvaluator; import org.apache.solr.client.solrj.io.eval.IfThenElseEvaluator; import org.apache.solr.client.solrj.io.eval.LessThanEqualToEvaluator; import org.apache.solr.client.solrj.io.eval.LessThanEvaluator; import org.apache.solr.client.solrj.io.eval.NotEvaluator; import org.apache.solr.client.solrj.io.eval.OrEvaluator; import org.apache.solr.client.solrj.io.eval.RawValueEvaluator; import org.apache.solr.client.solrj.io.ops.ConcatOperation; import org.apache.solr.client.solrj.io.ops.GroupOperation; import org.apache.solr.client.solrj.io.ops.ReplaceOperation; import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.io.stream.metrics.CountMetric; import org.apache.solr.client.solrj.io.stream.metrics.MaxMetric; import org.apache.solr.client.solrj.io.stream.metrics.MeanMetric; import org.apache.solr.client.solrj.io.stream.metrics.MinMetric; import org.apache.solr.client.solrj.io.stream.metrics.SumMetric; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.cloud.AbstractDistribZkTestBase; import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.junit.Assume; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; /** * All base tests will be done with CloudSolrStream. Under the covers CloudSolrStream uses SolrStream so * SolrStream will get fully exercised through these tests. * **/ @Slow @LuceneTestCase.SuppressCodecs({"Lucene3x", "Lucene40","Lucene41","Lucene42","Lucene45"}) public class StreamExpressionTest extends SolrCloudTestCase { private static final String COLLECTIONORALIAS = "collection1"; private static final int TIMEOUT = DEFAULT_TIMEOUT; private static final String id = "id"; private static boolean useAlias; @BeforeClass public static void setupCluster() throws Exception { configureCluster(4) .addConfig("conf", getFile("solrj").toPath().resolve("solr").resolve("configsets").resolve("streaming").resolve("conf")) .addConfig("ml", getFile("solrj").toPath().resolve("solr").resolve("configsets").resolve("ml").resolve("conf")) .configure(); String collection; useAlias = random().nextBoolean(); if (useAlias) { collection = COLLECTIONORALIAS + "_collection"; } else { collection = COLLECTIONORALIAS; } CollectionAdminRequest.createCollection(collection, "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish(collection, cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); if (useAlias) { CollectionAdminRequest.createAlias(COLLECTIONORALIAS, collection).process(cluster.getSolrClient()); } } @Before public void cleanIndex() throws Exception { new UpdateRequest() .deleteByQuery("*:*") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); } @Test public void testCloudSolrStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()); StreamExpression expression; CloudSolrStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { // Basic test expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); assertLong(tuples.get(0), "a_i", 0); // Basic w/aliases expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\", aliases=\"a_i=alias.a_i, a_s=name\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); assertLong(tuples.get(0), "alias.a_i", 0); assertString(tuples.get(0), "name", "hello0"); // Basic filtered test expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); assertOrder(tuples, 0, 3, 4); assertLong(tuples.get(1), "a_i", 3); try { expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); throw new Exception("Should be an exception here"); } catch (Exception e) { assertTrue(e.getMessage().contains("q param expected for search function")); } try { expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"blah\", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); throw new Exception("Should be an exception here"); } catch (Exception e) { assertTrue(e.getMessage().contains("fl param expected for search function")); } try { expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"blah\", fl=\"id, a_f\", sort=\"a_f\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); throw new Exception("Should be an exception here"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid sort spec")); } // Test with shards param List<String> shardUrls = TupleStream.getShards(cluster.getZkServer().getZkAddress(), COLLECTIONORALIAS, streamContext); Map<String, List<String>> shardsMap = new HashMap(); shardsMap.put("myCollection", shardUrls); StreamContext context = new StreamContext(); context.put("shards", shardsMap); context.setSolrClientCache(solrClientCache); // Basic test expression = StreamExpressionParser.parse("search(myCollection, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(context); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); assertLong(tuples.get(0), "a_i", 0); //Execersise the /stream hander //Add the shards http parameter for the myCollection StringBuilder buf = new StringBuilder(); for (String shardUrl : shardUrls) { if (buf.length() > 0) { buf.append(","); } buf.append(shardUrl); } ModifiableSolrParams solrParams = new ModifiableSolrParams(); solrParams.add("qt", "/stream"); solrParams.add("expr", "search(myCollection, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"); solrParams.add("myCollection.shards", buf.toString()); SolrStream solrStream = new SolrStream(shardUrls.get(0), solrParams); stream.setStreamContext(context); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); assertLong(tuples.get(0), "a_i", 0); } finally { solrClientCache.close(); } } @Test public void testSqlStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); List<String> shardUrls = TupleStream.getShards(cluster.getZkServer().getZkAddress(), COLLECTIONORALIAS, streamContext); try { StringBuilder buf = new StringBuilder(); for (String shardUrl : shardUrls) { if (buf.length() > 0) { buf.append(","); } buf.append(shardUrl); } ModifiableSolrParams solrParams = new ModifiableSolrParams(); solrParams.add("qt", "/stream"); solrParams.add("expr", "sql("+COLLECTIONORALIAS+", stmt=\"select id from collection1 order by a_i asc\")"); SolrStream solrStream = new SolrStream(shardUrls.get(0), solrParams); solrStream.setStreamContext(streamContext); tuples = getTuples(solrStream); assert (tuples.size() == 5); assertOrder(tuples, 0, 1, 2, 3, 4); } finally { solrClientCache.close(); } } @Test public void testCloudSolrStreamWithZkHost() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory(); StreamExpression expression; CloudSolrStream stream; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); List<Tuple> tuples; try { // Basic test expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", zkHost=" + cluster.getZkServer().getZkAddress() + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); assertLong(tuples.get(0), "a_i", 0); // Basic w/aliases expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\", aliases=\"a_i=alias.a_i, a_s=name\", zkHost=" + cluster.getZkServer().getZkAddress() + ")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); assertLong(tuples.get(0), "alias.a_i", 0); assertString(tuples.get(0), "name", "hello0"); // Basic filtered test expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", zkHost=" + cluster.getZkServer().getZkAddress() + ", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); assertOrder(tuples, 0, 3, 4); assertLong(tuples.get(1), "a_i", 3); // Test a couple of multile field lists. expression = StreamExpressionParser.parse("search(collection1, fq=\"a_s:hello0\", fq=\"a_s:hello1\", q=\"id:(*)\", " + "zkHost=" + cluster.getZkServer().getZkAddress() + ", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals("fq clauses should have prevented any docs from coming back", tuples.size(), 0); expression = StreamExpressionParser.parse("search(collection1, fq=\"a_s:(hello0 OR hello1)\", q=\"id:(*)\", " + "zkHost=" + cluster.getZkServer().getZkAddress() + ", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals("Combining an f1 clause should show us 2 docs", tuples.size(), 2); } finally { solrClientCache.close(); } } @Test public void testParameterSubstitution() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/" + COLLECTIONORALIAS; List<Tuple> tuples; TupleStream stream; // Basic test ModifiableSolrParams sParams = new ModifiableSolrParams(); sParams.set("expr", "merge(" + "${q1}," + "${q2}," + "on=${mySort})"); sParams.set(CommonParams.QT, "/stream"); sParams.set("q1", "search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", sort=${mySort})"); sParams.set("q2", "search(" + COLLECTIONORALIAS + ", q=\"id:(1)\", fl=\"id,a_s,a_i,a_f\", sort=${mySort})"); sParams.set("mySort", "a_f asc"); stream = new SolrStream(url, sParams); tuples = getTuples(stream); assertEquals(4, tuples.size()); assertOrder(tuples, 0,1,3,4); // Basic test desc sParams.set("mySort", "a_f desc"); stream = new SolrStream(url, sParams); tuples = getTuples(stream); assertEquals(4, tuples.size()); assertOrder(tuples, 4, 3, 1, 0); // Basic w/ multi comp sParams.set("q2", "search(" + COLLECTIONORALIAS + ", q=\"id:(1 2)\", fl=\"id,a_s,a_i,a_f\", sort=${mySort})"); sParams.set("mySort", "\"a_f asc, a_s asc\""); stream = new SolrStream(url, sParams); tuples = getTuples(stream); assertEquals(5, tuples.size()); assertOrder(tuples, 0, 2, 1, 3, 4); } @Test public void testUniqueStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("unique", UniqueStream.class); try { // Basic test expression = StreamExpressionParser.parse("unique(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\"), over=\"a_f\")"); stream = new UniqueStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); assertOrder(tuples, 0, 1, 3, 4); // Basic test desc expression = StreamExpressionParser.parse("unique(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f desc, a_i desc\"), over=\"a_f\")"); stream = new UniqueStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); assertOrder(tuples, 4, 3, 1, 2); // Basic w/multi comp expression = StreamExpressionParser.parse("unique(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\"), over=\"a_f, a_i\")"); stream = new UniqueStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); // full factory w/multi comp stream = factory.constructStream("unique(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\"), over=\"a_f, a_i\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); } finally { solrClientCache.close(); } } @Test public void testSortStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("sort", SortStream.class); // Basic test stream = factory.constructStream("sort(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i asc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 6); assertOrder(tuples, 0, 1, 5, 2, 3, 4); // Basic test desc stream = factory.constructStream("sort(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i desc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 6); assertOrder(tuples, 4, 3, 2, 1, 5, 0); // Basic w/multi comp stream = factory.constructStream("sort(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i asc, a_f desc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 6); assertOrder(tuples, 0, 5, 1, 2, 3, 4); } finally { solrClientCache.close(); } } @Test public void testNullStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("null", NullStream.class); try { // Basic test stream = factory.constructStream("null(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i asc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertTrue(tuples.size() == 1); assertTrue(tuples.get(0).getLong("nullCount") == 6); } finally { solrClientCache.close(); } } @Test public void testParallelNullStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("null", NullStream.class) .withFunctionName("parallel", ParallelStream.class); try { // Basic test stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"nullCount desc\", null(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), by=\"a_i asc\"))"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertTrue(tuples.size() == 2); long nullCount = 0; for (Tuple t : tuples) { nullCount += t.getLong("nullCount"); } assertEquals(nullCount, 6L); } finally { solrClientCache.close(); } } @Test public void testNulls() throws Exception { new UpdateRequest() .add(id, "0", "a_i", "1", "a_f", "0", "s_multi", "aaa", "s_multi", "bbb", "i_multi", "100", "i_multi", "200") .add(id, "2", "a_s", "hello2", "a_i", "3", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "4", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "2", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; Tuple tuple; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class); try { // Basic test expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f, s_multi, i_multi\", qt=\"/export\", sort=\"a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 4, 0, 1, 2, 3); tuple = tuples.get(0); assertTrue("hello4".equals(tuple.getString("a_s"))); assertNull(tuple.get("s_multi")); assertNull(tuple.get("i_multi")); assertNull(tuple.getLong("a_i")); tuple = tuples.get(1); assertNull(tuple.get("a_s")); List<String> strings = tuple.getStrings("s_multi"); assertNotNull(strings); assertEquals("aaa", strings.get(0)); assertEquals("bbb", strings.get(1)); List<Long> longs = tuple.getLongs("i_multi"); assertNotNull(longs); //test sort (asc) with null string field. Null should sort to the top. expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f, s_multi, i_multi\", qt=\"/export\", sort=\"a_s asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 1, 2, 3, 4); //test sort(desc) with null string field. Null should sort to the bottom. expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f, s_multi, i_multi\", qt=\"/export\", sort=\"a_s desc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 4, 3, 2, 1, 0); } finally { solrClientCache.close(); } } @Test public void testMergeStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("unique", UniqueStream.class) .withFunctionName("merge", MergeStream.class); // Basic test expression = StreamExpressionParser.parse("merge(" + "search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"id:(1)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\")," + "on=\"a_f asc\")"); stream = new MergeStream(expression, factory); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); assertOrder(tuples, 0, 1, 3, 4); // Basic test desc expression = StreamExpressionParser.parse("merge(" + "search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f desc\")," + "search(" + COLLECTIONORALIAS + ", q=\"id:(1)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f desc\")," + "on=\"a_f desc\")"); stream = new MergeStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); assertOrder(tuples, 4, 3, 1, 0); // Basic w/multi comp expression = StreamExpressionParser.parse("merge(" + "search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"id:(1 2)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\")," + "on=\"a_f asc, a_s asc\")"); stream = new MergeStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); // full factory w/multi comp stream = factory.constructStream("merge(" + "search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"id:(1 2)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\")," + "on=\"a_f asc, a_s asc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 2, 1, 3, 4); // full factory w/multi streams stream = factory.constructStream("merge(" + "search(" + COLLECTIONORALIAS + ", q=\"id:(0 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"id:(1)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"id:(2)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\")," + "on=\"a_f asc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); assertOrder(tuples, 0, 2, 1, 4); } finally { solrClientCache.close(); } } @Test public void testRankStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("unique", UniqueStream.class) .withFunctionName("top", RankStream.class); try { // Basic test expression = StreamExpressionParser.parse("top(" + "n=3," + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")," + "sort=\"a_f asc, a_i asc\")"); stream = new RankStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); assertOrder(tuples, 0, 2, 1); // Basic test desc expression = StreamExpressionParser.parse("top(" + "n=2," + "unique(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f desc\")," + "over=\"a_f\")," + "sort=\"a_f desc\")"); stream = new RankStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 2); assertOrder(tuples, 4, 3); // full factory stream = factory.constructStream("top(" + "n=4," + "unique(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")," + "over=\"a_f\")," + "sort=\"a_f asc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); assertOrder(tuples, 0, 1, 3, 4); // full factory, switch order stream = factory.constructStream("top(" + "n=4," + "unique(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f desc, a_i desc\")," + "over=\"a_f\")," + "sort=\"a_f asc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); assertOrder(tuples, 2, 1, 3, 4); } finally { solrClientCache.close(); } } @Test public void testRandomStream() throws Exception { UpdateRequest update = new UpdateRequest(); for(int idx = 0; idx < 1000; ++idx){ String idxString = new Integer(idx).toString(); update.add(id,idxString, "a_s", "hello" + idxString, "a_i", idxString, "a_f", idxString); } update.commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("random", RandomStream.class); StreamContext context = new StreamContext(); SolrClientCache cache = new SolrClientCache(); try { context.setSolrClientCache(cache); expression = StreamExpressionParser.parse("random(" + COLLECTIONORALIAS + ", q=\"*:*\", rows=\"1000\", fl=\"id, a_i\")"); stream = factory.constructStream(expression); stream.setStreamContext(context); List<Tuple> tuples1 = getTuples(stream); assert (tuples1.size() == 1000); expression = StreamExpressionParser.parse("random(" + COLLECTIONORALIAS + ", q=\"*:*\", rows=\"1000\", fl=\"id, a_i\")"); stream = factory.constructStream(expression); stream.setStreamContext(context); List<Tuple> tuples2 = getTuples(stream); assert (tuples2.size() == 1000); boolean different = false; for (int i = 0; i < tuples1.size(); i++) { Tuple tuple1 = tuples1.get(i); Tuple tuple2 = tuples2.get(i); if (!tuple1.get("id").equals(tuple2.get(id))) { different = true; break; } } assertTrue(different); Collections.sort(tuples1, new FieldComparator("id", ComparatorOrder.ASCENDING)); Collections.sort(tuples2, new FieldComparator("id", ComparatorOrder.ASCENDING)); for (int i = 0; i < tuples1.size(); i++) { Tuple tuple1 = tuples1.get(i); Tuple tuple2 = tuples2.get(i); if (!tuple1.get("id").equals(tuple2.get(id))) { assert(tuple1.getLong("id").equals(tuple2.get("a_i"))); } } expression = StreamExpressionParser.parse("random(" + COLLECTIONORALIAS + ", q=\"*:*\", rows=\"1\", fl=\"id, a_i\")"); stream = factory.constructStream(expression); stream.setStreamContext(context); List<Tuple> tuples3 = getTuples(stream); assert (tuples3.size() == 1); //Exercise the /stream handler ModifiableSolrParams sParams = new ModifiableSolrParams(StreamingTest.mapParams(CommonParams.QT, "/stream")); sParams.add("expr", "random(" + COLLECTIONORALIAS + ", q=\"*:*\", rows=\"1\", fl=\"id, a_i\")"); JettySolrRunner jetty = cluster.getJettySolrRunner(0); SolrStream solrStream = new SolrStream(jetty.getBaseUrl().toString() + "/collection1", sParams); List<Tuple> tuples4 = getTuples(solrStream); assert (tuples4.size() == 1); } finally { cache.close(); } } @Test public void testReducerStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; Tuple t0, t1, t2; List<Map> maps0, maps1, maps2; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("reduce", ReducerStream.class) .withFunctionName("group", GroupOperation.class); try { // basic expression = StreamExpressionParser.parse("reduce(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_s asc, a_f asc\")," + "by=\"a_s\"," + "group(sort=\"a_f desc\", n=\"4\"))"); stream = factory.constructStream(expression); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); t0 = tuples.get(0); maps0 = t0.getMaps("group"); assertMaps(maps0, 9, 1, 2, 0); t1 = tuples.get(1); maps1 = t1.getMaps("group"); assertMaps(maps1, 8, 7, 5, 3); t2 = tuples.get(2); maps2 = t2.getMaps("group"); assertMaps(maps2, 6, 4); // basic w/spaces expression = StreamExpressionParser.parse("reduce(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_s asc, a_f asc\")," + "by=\"a_s\"," + "group(sort=\"a_i asc\", n=\"2\"))"); stream = factory.constructStream(expression); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); t0 = tuples.get(0); maps0 = t0.getMaps("group"); assert (maps0.size() == 2); assertMaps(maps0, 0, 1); t1 = tuples.get(1); maps1 = t1.getMaps("group"); assertMaps(maps1, 3, 5); t2 = tuples.get(2); maps2 = t2.getMaps("group"); assertMaps(maps2, 4, 6); } finally { solrClientCache.close(); } } @Test public void testHavingStream() throws Exception { SolrClientCache solrClientCache = new SolrClientCache(); new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1", "subject", "blah blah blah 0") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2", "subject", "blah blah blah 2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "subject", "blah blah blah 3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "subject", "blah blah blah 4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5", "subject", "blah blah blah 1") .add(id, "5", "a_s", "hello3", "a_i", "5", "a_f", "6", "subject", "blah blah blah 5") .add(id, "6", "a_s", "hello4", "a_i", "6", "a_f", "7", "subject", "blah blah blah 6") .add(id, "7", "a_s", "hello3", "a_i", "7", "a_f", "8", "subject", "blah blah blah 7") .add(id, "8", "a_s", "hello3", "a_i", "8", "a_f", "9", "subject", "blah blah blah 8") .add(id, "9", "a_s", "hello0", "a_i", "9", "a_f", "10", "subject", "blah blah blah 9") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); TupleStream stream; List<Tuple> tuples; StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("having", HavingStream.class) .withFunctionName("rollup", RollupStream.class) .withFunctionName("sum", SumMetric.class) .withFunctionName("and", AndEvaluator.class) .withFunctionName("or", OrEvaluator.class) .withFunctionName("not", NotEvaluator.class) .withFunctionName("gt", GreaterThanEvaluator.class) .withFunctionName("lt", LessThanEvaluator.class) .withFunctionName("eq", EqualsEvaluator.class) .withFunctionName("lteq", LessThanEqualToEvaluator.class) .withFunctionName("gteq", GreaterThanEqualToEvaluator.class); stream = factory.constructStream("having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), eq(a_i, 9))"); StreamContext context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 1); Tuple t = tuples.get(0); assertTrue(t.getString("id").equals("9")); stream = factory.constructStream("having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), and(eq(a_i, 9),lt(a_i, 10)))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 1); t = tuples.get(0); assertTrue(t.getString("id").equals("9")); stream = factory.constructStream("having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), or(eq(a_i, 9),eq(a_i, 8)))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 2); t = tuples.get(0); assertTrue(t.getString("id").equals("8")); t = tuples.get(1); assertTrue(t.getString("id").equals("9")); stream = factory.constructStream("having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), and(eq(a_i, 9),not(eq(a_i, 9))))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 0); stream = factory.constructStream("having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), and(lteq(a_i, 9), gteq(a_i, 8)))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 2); t = tuples.get(0); assertTrue(t.getString("id").equals("8")); t = tuples.get(1); assertTrue(t.getString("id").equals("9")); stream = factory.constructStream("having(rollup(over=a_f, sum(a_i), search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\")), and(eq(sum(a_i), 9),eq(sum(a_i), 9)))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 1); t = tuples.get(0); assertTrue(t.getDouble("a_f") == 10.0D); solrClientCache.close(); } @Test public void testParallelHavingStream() throws Exception { SolrClientCache solrClientCache = new SolrClientCache(); new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1", "subject", "blah blah blah 0") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2", "subject", "blah blah blah 2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "subject", "blah blah blah 3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "subject", "blah blah blah 4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5", "subject", "blah blah blah 1") .add(id, "5", "a_s", "hello3", "a_i", "5", "a_f", "6", "subject", "blah blah blah 5") .add(id, "6", "a_s", "hello4", "a_i", "6", "a_f", "7", "subject", "blah blah blah 6") .add(id, "7", "a_s", "hello3", "a_i", "7", "a_f", "8", "subject", "blah blah blah 7") .add(id, "8", "a_s", "hello3", "a_i", "8", "a_f", "9", "subject", "blah blah blah 8") .add(id, "9", "a_s", "hello0", "a_i", "9", "a_f", "10", "subject", "blah blah blah 9") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); TupleStream stream; List<Tuple> tuples; StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("having", HavingStream.class) .withFunctionName("rollup", RollupStream.class) .withFunctionName("sum", SumMetric.class) .withFunctionName("and", AndEvaluator.class) .withFunctionName("or", OrEvaluator.class) .withFunctionName("not", NotEvaluator.class) .withFunctionName("gt", GreaterThanEvaluator.class) .withFunctionName("lt", LessThanEvaluator.class) .withFunctionName("eq", EqualsEvaluator.class) .withFunctionName("lteq", LessThanEqualToEvaluator.class) .withFunctionName("gteq", GreaterThanEqualToEvaluator.class) .withFunctionName("val", RawValueEvaluator.class) .withFunctionName("parallel", ParallelStream.class); stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"a_f asc\", having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), eq(a_i, 9)))"); StreamContext context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 1); Tuple t = tuples.get(0); assertTrue(t.getString("id").equals("9")); stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"a_f asc\", having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), and(eq(a_i, 9),lt(a_i, 10))))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 1); t = tuples.get(0); assertTrue(t.getString("id").equals("9")); stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"a_f asc\",having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), or(eq(a_i, 9),eq(a_i, 8))))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 2); t = tuples.get(0); assertTrue(t.getString("id").equals("8")); t = tuples.get(1); assertTrue(t.getString("id").equals("9")); stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"a_f asc\", having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), and(eq(a_i, 9),not(eq(a_i, 9)))))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 0); stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"a_f asc\",having(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), and(lteq(a_i, 9), gteq(a_i, 8))))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 2); t = tuples.get(0); assertTrue(t.getString("id").equals("8")); t = tuples.get(1); assertTrue(t.getString("id").equals("9")); stream = factory.constructStream("parallel("+COLLECTIONORALIAS+", workers=2, sort=\"a_f asc\", having(rollup(over=a_f, sum(a_i), search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=a_f)), and(eq(sum(a_i), 9),eq(sum(a_i),9))))"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 1); t = tuples.get(0); assertTrue(t.getDouble("a_f") == 10.0D); solrClientCache.close(); } @Test public void testFetchStream() throws Exception { SolrClientCache solrClientCache = new SolrClientCache();//TODO share in @Before ; close in @After ? new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1", "subject", "blah blah blah 0") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2", "subject", "blah blah blah 2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "subject", "blah blah blah 3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "subject", "blah blah blah 4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5", "subject", "blah blah blah 1") .add(id, "5", "a_s", "hello3", "a_i", "5", "a_f", "6", "subject", "blah blah blah 5") .add(id, "6", "a_s", "hello4", "a_i", "6", "a_f", "7", "subject", "blah blah blah 6") .add(id, "7", "a_s", "hello3", "a_i", "7", "a_f", "8", "subject", "blah blah blah 7") .add(id, "8", "a_s", "hello3", "a_i", "8", "a_f", "9", "subject", "blah blah blah 8") .add(id, "9", "a_s", "hello0", "a_i", "9", "a_f", "10", "subject", "blah blah blah 9") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); TupleStream stream; List<Tuple> tuples; StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("fetch", FetchStream.class); stream = factory.constructStream("fetch("+ COLLECTIONORALIAS +", search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), on=\"id=a_i\", batchSize=\"2\", fl=\"subject\")"); StreamContext context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 10); Tuple t = tuples.get(0); assertTrue("blah blah blah 0".equals(t.getString("subject"))); t = tuples.get(1); assertTrue("blah blah blah 2".equals(t.getString("subject"))); t = tuples.get(2); assertTrue("blah blah blah 3".equals(t.getString("subject"))); t = tuples.get(3); assertTrue("blah blah blah 4".equals(t.getString("subject"))); t = tuples.get(4); assertTrue("blah blah blah 1".equals(t.getString("subject"))); t = tuples.get(5); assertTrue("blah blah blah 5".equals(t.getString("subject"))); t = tuples.get(6); assertTrue("blah blah blah 6".equals(t.getString("subject"))); t = tuples.get(7); assertTrue("blah blah blah 7".equals(t.getString("subject"))); t = tuples.get(8); assertTrue("blah blah blah 8".equals(t.getString("subject"))); t = tuples.get(9); assertTrue("blah blah blah 9".equals(t.getString("subject"))); //Change the batch size stream = factory.constructStream("fetch(" + COLLECTIONORALIAS + ", search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), on=\"id=a_i\", batchSize=\"3\", fl=\"subject\")"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assert(tuples.size() == 10); t = tuples.get(0); assertTrue("blah blah blah 0".equals(t.getString("subject"))); t = tuples.get(1); assertTrue("blah blah blah 2".equals(t.getString("subject"))); t = tuples.get(2); assertTrue("blah blah blah 3".equals(t.getString("subject"))); t = tuples.get(3); assertTrue("blah blah blah 4".equals(t.getString("subject"))); t = tuples.get(4); assertTrue("blah blah blah 1".equals(t.getString("subject"))); t = tuples.get(5); assertTrue("blah blah blah 5".equals(t.getString("subject"))); t = tuples.get(6); assertTrue("blah blah blah 6".equals(t.getString("subject"))); t = tuples.get(7); assertTrue("blah blah blah 7".equals(t.getString("subject"))); t = tuples.get(8); assertTrue("blah blah blah 8".equals(t.getString("subject"))); t = tuples.get(9); assertTrue("blah blah blah 9".equals(t.getString("subject"))); // SOLR-10404 test that "hello 99" as a value gets escaped new UpdateRequest() .add(id, "99", "a1_s", "hello 99", "a2_s", "hello 99", "subject", "blah blah blah 99") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); stream = factory.constructStream("fetch("+ COLLECTIONORALIAS +", search(" + COLLECTIONORALIAS + ", q=" + id + ":99, fl=\"id,a1_s\", sort=\"id asc\"), on=\"a1_s=a2_s\", fl=\"subject\")"); context = new StreamContext(); context.setSolrClientCache(solrClientCache); stream.setStreamContext(context); tuples = getTuples(stream); assertEquals(1, tuples.size()); t = tuples.get(0); assertTrue("blah blah blah 99".equals(t.getString("subject"))); solrClientCache.close(); } @Test public void testParallelFetchStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1", "subject", "blah blah blah 0") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2", "subject", "blah blah blah 2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "subject", "blah blah blah 3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "subject", "blah blah blah 4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5", "subject", "blah blah blah 1") .add(id, "5", "a_s", "hello3", "a_i", "5", "a_f", "6", "subject", "blah blah blah 5") .add(id, "6", "a_s", "hello4", "a_i", "6", "a_f", "7", "subject", "blah blah blah 6") .add(id, "7", "a_s", "hello3", "a_i", "7", "a_f", "8", "subject", "blah blah blah 7") .add(id, "8", "a_s", "hello3", "a_i", "8", "a_f", "9", "subject", "blah blah blah 8") .add(id, "9", "a_s", "hello0", "a_i", "9", "a_f", "10", "subject", "blah blah blah 9") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); TupleStream stream; List<Tuple> tuples; StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("parallel", ParallelStream.class) .withFunctionName("fetch", FetchStream.class); try { stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"a_f asc\", fetch(" + COLLECTIONORALIAS + ", search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=\"id\"), on=\"id=a_i\", batchSize=\"2\", fl=\"subject\"))"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 10); Tuple t = tuples.get(0); assertTrue("blah blah blah 0".equals(t.getString("subject"))); t = tuples.get(1); assertTrue("blah blah blah 2".equals(t.getString("subject"))); t = tuples.get(2); assertTrue("blah blah blah 3".equals(t.getString("subject"))); t = tuples.get(3); assertTrue("blah blah blah 4".equals(t.getString("subject"))); t = tuples.get(4); assertTrue("blah blah blah 1".equals(t.getString("subject"))); t = tuples.get(5); assertTrue("blah blah blah 5".equals(t.getString("subject"))); t = tuples.get(6); assertTrue("blah blah blah 6".equals(t.getString("subject"))); t = tuples.get(7); assertTrue("blah blah blah 7".equals(t.getString("subject"))); t = tuples.get(8); assertTrue("blah blah blah 8".equals(t.getString("subject"))); t = tuples.get(9); assertTrue("blah blah blah 9".equals(t.getString("subject"))); stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"a_f asc\", fetch(" + COLLECTIONORALIAS + ", search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=\"id\"), on=\"id=a_i\", batchSize=\"3\", fl=\"subject\"))"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 10); t = tuples.get(0); assertTrue("blah blah blah 0".equals(t.getString("subject"))); t = tuples.get(1); assertTrue("blah blah blah 2".equals(t.getString("subject"))); t = tuples.get(2); assertTrue("blah blah blah 3".equals(t.getString("subject"))); t = tuples.get(3); assertTrue("blah blah blah 4".equals(t.getString("subject"))); t = tuples.get(4); assertTrue("blah blah blah 1".equals(t.getString("subject"))); t = tuples.get(5); assertTrue("blah blah blah 5".equals(t.getString("subject"))); t = tuples.get(6); assertTrue("blah blah blah 6".equals(t.getString("subject"))); t = tuples.get(7); assertTrue("blah blah blah 7".equals(t.getString("subject"))); t = tuples.get(8); assertTrue("blah blah blah 8".equals(t.getString("subject"))); t = tuples.get(9); assertTrue("blah blah blah 9".equals(t.getString("subject"))); } finally { solrClientCache.close(); } } @Test public void testDaemonStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("rollup", RollupStream.class) .withFunctionName("sum", SumMetric.class) .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) .withFunctionName("avg", MeanMetric.class) .withFunctionName("count", CountMetric.class) .withFunctionName("daemon", DaemonStream.class); StreamExpression expression; DaemonStream daemonStream; expression = StreamExpressionParser.parse("daemon(rollup(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"a_i,a_s\", sort=\"a_s asc\")," + "over=\"a_s\"," + "sum(a_i)" + "), id=\"test\", runInterval=\"1000\", queueSize=\"9\")"); daemonStream = (DaemonStream)factory.constructStream(expression); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); daemonStream.setStreamContext(streamContext); try { //Test Long and Double Sums daemonStream.open(); // This will start the daemon thread for (int i = 0; i < 4; i++) { Tuple tuple = daemonStream.read(); // Reads from the queue String bucket = tuple.getString("a_s"); Double sumi = tuple.getDouble("sum(a_i)"); //System.out.println("#################################### Bucket 1:"+bucket); assertTrue(bucket.equals("hello0")); assertTrue(sumi.doubleValue() == 17.0D); tuple = daemonStream.read(); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); //System.out.println("#################################### Bucket 2:"+bucket); assertTrue(bucket.equals("hello3")); assertTrue(sumi.doubleValue() == 38.0D); tuple = daemonStream.read(); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); //System.out.println("#################################### Bucket 3:"+bucket); assertTrue(bucket.equals("hello4")); assertTrue(sumi.longValue() == 15); } //Now lets wait until the internal queue fills up while (daemonStream.remainingCapacity() > 0) { try { Thread.sleep(1000); } catch (Exception e) { } } //OK capacity is full, let's index a new doc new UpdateRequest() .add(id, "10", "a_s", "hello0", "a_i", "1", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); //Now lets clear the existing docs in the queue 9, plus 3 more to get passed the run that was blocked. The next run should //have the tuples with the updated count. for (int i = 0; i < 12; i++) { daemonStream.read(); } //And rerun the loop. It should have a new count for hello0 for (int i = 0; i < 4; i++) { Tuple tuple = daemonStream.read(); // Reads from the queue String bucket = tuple.getString("a_s"); Double sumi = tuple.getDouble("sum(a_i)"); //System.out.println("#################################### Bucket 1:"+bucket); assertTrue(bucket.equals("hello0")); assertTrue(sumi.doubleValue() == 18.0D); tuple = daemonStream.read(); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); //System.out.println("#################################### Bucket 2:"+bucket); assertTrue(bucket.equals("hello3")); assertTrue(sumi.doubleValue() == 38.0D); tuple = daemonStream.read(); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); //System.out.println("#################################### Bucket 3:"+bucket); assertTrue(bucket.equals("hello4")); assertTrue(sumi.longValue() == 15); } } finally { daemonStream.close(); //This should stop the daemon thread solrClientCache.close(); } } @Test public void testTerminatingDaemonStream() throws Exception { Assume.assumeTrue(!useAlias); new UpdateRequest() .add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("topic", TopicStream.class) .withFunctionName("daemon", DaemonStream.class); StreamExpression expression; DaemonStream daemonStream; SolrClientCache cache = new SolrClientCache(); StreamContext context = new StreamContext(); context.setSolrClientCache(cache); expression = StreamExpressionParser.parse("daemon(topic("+ COLLECTIONORALIAS +","+ COLLECTIONORALIAS +", q=\"a_s:hello\", initialCheckpoint=0, id=\"topic1\", rows=2, fl=\"id\"" + "), id=test, runInterval=1000, terminate=true, queueSize=50)"); daemonStream = (DaemonStream)factory.constructStream(expression); daemonStream.setStreamContext(context); List<Tuple> tuples = getTuples(daemonStream); assertTrue(tuples.size() == 10); cache.close(); } @Test public void testRollupStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("rollup", RollupStream.class) .withFunctionName("sum", SumMetric.class) .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) .withFunctionName("avg", MeanMetric.class) .withFunctionName("count", CountMetric.class); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { expression = StreamExpressionParser.parse("rollup(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"a_s,a_i,a_f\", sort=\"a_s asc\")," + "over=\"a_s\"," + "sum(a_i)," + "sum(a_f)," + "min(a_i)," + "min(a_f)," + "max(a_i)," + "max(a_f)," + "avg(a_i)," + "avg(a_f)," + "count(*)," + ")"); stream = factory.constructStream(expression); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); //Test Long and Double Sums Tuple tuple = tuples.get(0); String bucket = tuple.getString("a_s"); Double sumi = tuple.getDouble("sum(a_i)"); Double sumf = tuple.getDouble("sum(a_f)"); Double mini = tuple.getDouble("min(a_i)"); Double minf = tuple.getDouble("min(a_f)"); Double maxi = tuple.getDouble("max(a_i)"); Double maxf = tuple.getDouble("max(a_f)"); Double avgi = tuple.getDouble("avg(a_i)"); Double avgf = tuple.getDouble("avg(a_f)"); Double count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello0")); assertTrue(sumi.doubleValue() == 17.0D); assertTrue(sumf.doubleValue() == 18.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 4.25D); assertTrue(avgf.doubleValue() == 4.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(1); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello3")); assertTrue(sumi.doubleValue() == 38.0D); assertTrue(sumf.doubleValue() == 26.0D); assertTrue(mini.doubleValue() == 3.0D); assertTrue(minf.doubleValue() == 3.0D); assertTrue(maxi.doubleValue() == 13.0D); assertTrue(maxf.doubleValue() == 9.0D); assertTrue(avgi.doubleValue() == 9.5D); assertTrue(avgf.doubleValue() == 6.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(2); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello4")); assertTrue(sumi.longValue() == 15); assertTrue(sumf.doubleValue() == 11.0D); assertTrue(mini.doubleValue() == 4.0D); assertTrue(minf.doubleValue() == 4.0D); assertTrue(maxi.doubleValue() == 11.0D); assertTrue(maxf.doubleValue() == 7.0D); assertTrue(avgi.doubleValue() == 7.5D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 2); } finally { solrClientCache.close(); } } @Test public void testStatsStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("stats", StatsStream.class) .withFunctionName("sum", SumMetric.class) .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) .withFunctionName("avg", MeanMetric.class) .withFunctionName("count", CountMetric.class); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache cache = new SolrClientCache(); try { streamContext.setSolrClientCache(cache); String expr = "stats(" + COLLECTIONORALIAS + ", q=*:*, sum(a_i), sum(a_f), min(a_i), min(a_f), max(a_i), max(a_f), avg(a_i), avg(a_f), count(*))"; expression = StreamExpressionParser.parse(expr); stream = factory.constructStream(expression); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 1); //Test Long and Double Sums Tuple tuple = tuples.get(0); Double sumi = tuple.getDouble("sum(a_i)"); Double sumf = tuple.getDouble("sum(a_f)"); Double mini = tuple.getDouble("min(a_i)"); Double minf = tuple.getDouble("min(a_f)"); Double maxi = tuple.getDouble("max(a_i)"); Double maxf = tuple.getDouble("max(a_f)"); Double avgi = tuple.getDouble("avg(a_i)"); Double avgf = tuple.getDouble("avg(a_f)"); Double count = tuple.getDouble("count(*)"); assertTrue(sumi.longValue() == 70); assertTrue(sumf.doubleValue() == 55.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 7.0D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 10); //Test with shards parameter List<String> shardUrls = TupleStream.getShards(cluster.getZkServer().getZkAddress(), COLLECTIONORALIAS, streamContext); expr = "stats(myCollection, q=*:*, sum(a_i), sum(a_f), min(a_i), min(a_f), max(a_i), max(a_f), avg(a_i), avg(a_f), count(*))"; Map<String, List<String>> shardsMap = new HashMap(); shardsMap.put("myCollection", shardUrls); StreamContext context = new StreamContext(); context.put("shards", shardsMap); context.setSolrClientCache(cache); stream = factory.constructStream(expr); stream.setStreamContext(context); tuples = getTuples(stream); assert (tuples.size() == 1); //Test Long and Double Sums tuple = tuples.get(0); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(sumi.longValue() == 70); assertTrue(sumf.doubleValue() == 55.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 7.0D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 10); //Execersise the /stream hander //Add the shards http parameter for the myCollection StringBuilder buf = new StringBuilder(); for (String shardUrl : shardUrls) { if (buf.length() > 0) { buf.append(","); } buf.append(shardUrl); } ModifiableSolrParams solrParams = new ModifiableSolrParams(); solrParams.add("qt", "/stream"); solrParams.add("expr", expr); solrParams.add("myCollection.shards", buf.toString()); SolrStream solrStream = new SolrStream(shardUrls.get(0), solrParams); tuples = getTuples(solrStream); assert (tuples.size() == 1); tuple =tuples.get(0); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(sumi.longValue() == 70); assertTrue(sumf.doubleValue() == 55.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 7.0D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 10); //Add a negative test to prove that it cannot find slices if shards parameter is removed try { ModifiableSolrParams solrParamsBad = new ModifiableSolrParams(); solrParamsBad.add("qt", "/stream"); solrParamsBad.add("expr", expr); solrStream = new SolrStream(shardUrls.get(0), solrParamsBad); tuples = getTuples(solrStream); throw new Exception("Exception should have been thrown above"); } catch (IOException e) { assertTrue(e.getMessage().contains("Collection not found: myCollection")); } } finally { cache.close(); } } @Test public void testParallelUniqueStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .add(id, "5", "a_s", "hello1", "a_i", "10", "a_f", "1") .add(id, "6", "a_s", "hello1", "a_i", "11", "a_f", "5") .add(id, "7", "a_s", "hello1", "a_i", "12", "a_f", "5") .add(id, "8", "a_s", "hello1", "a_i", "13", "a_f", "4") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory streamFactory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, zkHost) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("unique", UniqueStream.class) .withFunctionName("top", RankStream.class) .withFunctionName("group", ReducerStream.class) .withFunctionName("parallel", ParallelStream.class); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { ParallelStream pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", unique(search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\", partitionKeys=\"a_f\"), over=\"a_f\"), workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_f asc\")"); pstream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(pstream); assert (tuples.size() == 5); assertOrder(tuples, 0, 1, 3, 4, 6); //Test the eofTuples Map<String, Tuple> eofTuples = pstream.getEofTuples(); assert (eofTuples.size() == 2); //There should be an EOF tuple for each worker. } finally { solrClientCache.close(); } } @Test public void testParallelShuffleStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .add(id, "5", "a_s", "hello1", "a_i", "10", "a_f", "1") .add(id, "6", "a_s", "hello1", "a_i", "11", "a_f", "5") .add(id, "7", "a_s", "hello1", "a_i", "12", "a_f", "5") .add(id, "8", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "9", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "10", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "11", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "12", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "13", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "14", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "15", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "16", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "17", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "18", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "19", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "20", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "21", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "22", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "23", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "24", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "25", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "26", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "27", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "28", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "29", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "30", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "31", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "32", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "33", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "34", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "35", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "36", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "37", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "38", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "39", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "40", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "41", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "42", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "43", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "44", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "45", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "46", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "47", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "48", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "49", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "50", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "51", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "52", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "53", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "54", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "55", "a_s", "hello1", "a_i", "13", "a_f", "4") .add(id, "56", "a_s", "hello1", "a_i", "13", "a_f", "1000") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory streamFactory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, zkHost) .withFunctionName("shuffle", ShuffleStream.class) .withFunctionName("unique", UniqueStream.class) .withFunctionName("parallel", ParallelStream.class); try { ParallelStream pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", unique(shuffle(collection1, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\", partitionKeys=\"a_f\"), over=\"a_f\"), workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_f asc\")"); pstream.setStreamFactory(streamFactory); pstream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(pstream); assert (tuples.size() == 6); assertOrder(tuples, 0, 1, 3, 4, 6, 56); //Test the eofTuples Map<String, Tuple> eofTuples = pstream.getEofTuples(); assert (eofTuples.size() == 2); //There should be an EOF tuple for each worker. assert (pstream.toExpression(streamFactory).toString().contains("shuffle")); } finally { solrClientCache.close(); } } @Test public void testParallelReducerStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory streamFactory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, zkHost) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("group", GroupOperation.class) .withFunctionName("reduce", ReducerStream.class) .withFunctionName("parallel", ParallelStream.class); try { ParallelStream pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", " + "reduce(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"id,a_s,a_i,a_f\", sort=\"a_s asc,a_f asc\", partitionKeys=\"a_s\"), " + "by=\"a_s\"," + "group(sort=\"a_i asc\", n=\"5\")), " + "workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_s asc\")"); pstream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(pstream); assert (tuples.size() == 3); Tuple t0 = tuples.get(0); List<Map> maps0 = t0.getMaps("group"); assertMaps(maps0, 0, 1, 2, 9); Tuple t1 = tuples.get(1); List<Map> maps1 = t1.getMaps("group"); assertMaps(maps1, 3, 5, 7, 8); Tuple t2 = tuples.get(2); List<Map> maps2 = t2.getMaps("group"); assertMaps(maps2, 4, 6); pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", " + "reduce(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"id,a_s,a_i,a_f\", sort=\"a_s desc,a_f asc\", partitionKeys=\"a_s\"), " + "by=\"a_s\", " + "group(sort=\"a_i desc\", n=\"5\"))," + "workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_s desc\")"); pstream.setStreamContext(streamContext); tuples = getTuples(pstream); assert (tuples.size() == 3); t0 = tuples.get(0); maps0 = t0.getMaps("group"); assertMaps(maps0, 6, 4); t1 = tuples.get(1); maps1 = t1.getMaps("group"); assertMaps(maps1, 8, 7, 5, 3); t2 = tuples.get(2); maps2 = t2.getMaps("group"); assertMaps(maps2, 9, 2, 1, 0); } finally { solrClientCache.close(); } } @Test public void testParallelRankStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "5", "a_s", "hello1", "a_i", "5", "a_f", "1") .add(id, "6", "a_s", "hello1", "a_i", "6", "a_f", "1") .add(id, "7", "a_s", "hello1", "a_i", "7", "a_f", "1") .add(id, "8", "a_s", "hello1", "a_i", "8", "a_f", "1") .add(id, "9", "a_s", "hello1", "a_i", "9", "a_f", "1") .add(id, "10", "a_s", "hello1", "a_i", "10", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory streamFactory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, zkHost) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("unique", UniqueStream.class) .withFunctionName("top", RankStream.class) .withFunctionName("group", ReducerStream.class) .withFunctionName("parallel", ParallelStream.class); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { ParallelStream pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", " + "top(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"id,a_s,a_i\", sort=\"a_i asc\", partitionKeys=\"a_i\"), " + "n=\"11\", " + "sort=\"a_i desc\"), workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_i desc\")"); pstream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(pstream); assert (tuples.size() == 10); assertOrder(tuples, 10, 9, 8, 7, 6, 5, 4, 3, 2, 0); } finally { solrClientCache.close(); } } @Test public void testParallelMergeStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") .add(id, "5", "a_s", "hello0", "a_i", "10", "a_f", "0") .add(id, "6", "a_s", "hello2", "a_i", "8", "a_f", "0") .add(id, "7", "a_s", "hello3", "a_i", "7", "a_f", "3") .add(id, "8", "a_s", "hello4", "a_i", "11", "a_f", "4") .add(id, "9", "a_s", "hello1", "a_i", "100", "a_f", "1") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory streamFactory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, zkHost) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("unique", UniqueStream.class) .withFunctionName("top", RankStream.class) .withFunctionName("group", ReducerStream.class) .withFunctionName("merge", MergeStream.class) .withFunctionName("parallel", ParallelStream.class); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { //Test ascending ParallelStream pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", merge(search(" + COLLECTIONORALIAS + ", q=\"id:(4 1 8 7 9)\", fl=\"id,a_s,a_i\", sort=\"a_i asc\", partitionKeys=\"a_i\"), search(" + COLLECTIONORALIAS + ", q=\"id:(0 2 3 6)\", fl=\"id,a_s,a_i\", sort=\"a_i asc\", partitionKeys=\"a_i\"), on=\"a_i asc\"), workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_i asc\")"); pstream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(pstream); assert (tuples.size() == 9); assertOrder(tuples, 0, 1, 2, 3, 4, 7, 6, 8, 9); //Test descending pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", merge(search(" + COLLECTIONORALIAS + ", q=\"id:(4 1 8 9)\", fl=\"id,a_s,a_i\", sort=\"a_i desc\", partitionKeys=\"a_i\"), search(" + COLLECTIONORALIAS + ", q=\"id:(0 2 3 6)\", fl=\"id,a_s,a_i\", sort=\"a_i desc\", partitionKeys=\"a_i\"), on=\"a_i desc\"), workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_i desc\")"); pstream.setStreamContext(streamContext); tuples = getTuples(pstream); assert (tuples.size() == 8); assertOrder(tuples, 9, 8, 6, 4, 3, 2, 1, 0); } finally { solrClientCache.close(); } } @Test public void testParallelRollupStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("parallel", ParallelStream.class) .withFunctionName("rollup", RollupStream.class) .withFunctionName("sum", SumMetric.class) .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) .withFunctionName("avg", MeanMetric.class) .withFunctionName("count", CountMetric.class); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamExpression expression; TupleStream stream; List<Tuple> tuples; try { expression = StreamExpressionParser.parse("parallel(" + COLLECTIONORALIAS + "," + "rollup(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"a_s,a_i,a_f\", sort=\"a_s asc\", partitionKeys=\"a_s\")," + "over=\"a_s\"," + "sum(a_i)," + "sum(a_f)," + "min(a_i)," + "min(a_f)," + "max(a_i)," + "max(a_f)," + "avg(a_i)," + "avg(a_f)," + "count(*)" + ")," + "workers=\"2\", zkHost=\"" + cluster.getZkServer().getZkAddress() + "\", sort=\"a_s asc\")" ); stream = factory.constructStream(expression); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); //Test Long and Double Sums Tuple tuple = tuples.get(0); String bucket = tuple.getString("a_s"); Double sumi = tuple.getDouble("sum(a_i)"); Double sumf = tuple.getDouble("sum(a_f)"); Double mini = tuple.getDouble("min(a_i)"); Double minf = tuple.getDouble("min(a_f)"); Double maxi = tuple.getDouble("max(a_i)"); Double maxf = tuple.getDouble("max(a_f)"); Double avgi = tuple.getDouble("avg(a_i)"); Double avgf = tuple.getDouble("avg(a_f)"); Double count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello0")); assertTrue(sumi.doubleValue() == 17.0D); assertTrue(sumf.doubleValue() == 18.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 4.25D); assertTrue(avgf.doubleValue() == 4.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(1); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello3")); assertTrue(sumi.doubleValue() == 38.0D); assertTrue(sumf.doubleValue() == 26.0D); assertTrue(mini.doubleValue() == 3.0D); assertTrue(minf.doubleValue() == 3.0D); assertTrue(maxi.doubleValue() == 13.0D); assertTrue(maxf.doubleValue() == 9.0D); assertTrue(avgi.doubleValue() == 9.5D); assertTrue(avgf.doubleValue() == 6.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(2); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello4")); assertTrue(sumi.longValue() == 15); assertTrue(sumf.doubleValue() == 11.0D); assertTrue(mini.doubleValue() == 4.0D); assertTrue(minf.doubleValue() == 4.0D); assertTrue(maxi.doubleValue() == 11.0D); assertTrue(maxf.doubleValue() == 7.0D); assertTrue(avgi.doubleValue() == 7.5D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 2); } finally { solrClientCache.close(); } } @Test public void testInnerJoinStream() throws Exception { new UpdateRequest() .add(id, "1", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "15", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "2", "side_s", "left", "join1_i", "0", "join2_s", "b", "ident_s", "left_2") .add(id, "3", "side_s", "left", "join1_i", "1", "join2_s", "a", "ident_s", "left_3") // 10 .add(id, "4", "side_s", "left", "join1_i", "1", "join2_s", "b", "ident_s", "left_4") // 11 .add(id, "5", "side_s", "left", "join1_i", "1", "join2_s", "c", "ident_s", "left_5") // 12 .add(id, "6", "side_s", "left", "join1_i", "2", "join2_s", "d", "ident_s", "left_6") .add(id, "7", "side_s", "left", "join1_i", "3", "join2_s", "e", "ident_s", "left_7") // 14 .add(id, "8", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_1", "join3_i", "0") // 1,15 .add(id, "9", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_2", "join3_i", "0") // 1,15 .add(id, "10", "side_s", "right", "join1_i", "1", "join2_s", "a", "ident_s", "right_3", "join3_i", "1") // 3 .add(id, "11", "side_s", "right", "join1_i", "1", "join2_s", "b", "ident_s", "right_4", "join3_i", "1") // 4 .add(id, "12", "side_s", "right", "join1_i", "1", "join2_s", "c", "ident_s", "right_5", "join3_i", "1") // 5 .add(id, "13", "side_s", "right", "join1_i", "2", "join2_s", "dad", "ident_s", "right_6", "join3_i", "2") .add(id, "14", "side_s", "right", "join1_i", "3", "join2_s", "e", "ident_s", "right_7", "join3_i", "3") // 7 .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("innerJoin", InnerJoinStream.class); try { // Basic test expression = StreamExpressionParser.parse("innerJoin(" + "search(" + COLLECTIONORALIAS + ", q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"side_s:right\", fl=\"join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc\")," + "on=\"join1_i=join1_i, join2_s=join2_s\")"); stream = new InnerJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 8); assertOrder(tuples, 1, 1, 15, 15, 3, 4, 5, 7); // Basic desc expression = StreamExpressionParser.parse("innerJoin(" + "search(" + COLLECTIONORALIAS + ", q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i desc, join2_s asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"side_s:right\", fl=\"join1_i,join2_s,ident_s\", sort=\"join1_i desc, join2_s asc\")," + "on=\"join1_i=join1_i, join2_s=join2_s\")"); stream = new InnerJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 8); assertOrder(tuples, 7, 3, 4, 5, 1, 1, 15, 15); // Results in both searches, no join matches expression = StreamExpressionParser.parse("innerJoin(" + "search(" + COLLECTIONORALIAS + ", q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"ident_s asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"side_s:right\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"ident_s asc\", aliases=\"id=right.id, join1_i=right.join1_i, join2_s=right.join2_s, ident_s=right.ident_s\")," + "on=\"ident_s=right.ident_s\")"); stream = new InnerJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 0); // Differing field names expression = StreamExpressionParser.parse("innerJoin(" + "search(" + COLLECTIONORALIAS + ", q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"side_s:right\", fl=\"join3_i,join2_s,ident_s\", sort=\"join3_i asc, join2_s asc\", aliases=\"join3_i=aliasesField\")," + "on=\"join1_i=aliasesField, join2_s=join2_s\")"); stream = new InnerJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 8); assertOrder(tuples, 1, 1, 15, 15, 3, 4, 5, 7); } finally { solrClientCache.close(); } } @Test public void testLeftOuterJoinStream() throws Exception { new UpdateRequest() .add(id, "1", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "15", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "2", "side_s", "left", "join1_i", "0", "join2_s", "b", "ident_s", "left_2") .add(id, "3", "side_s", "left", "join1_i", "1", "join2_s", "a", "ident_s", "left_3") // 10 .add(id, "4", "side_s", "left", "join1_i", "1", "join2_s", "b", "ident_s", "left_4") // 11 .add(id, "5", "side_s", "left", "join1_i", "1", "join2_s", "c", "ident_s", "left_5") // 12 .add(id, "6", "side_s", "left", "join1_i", "2", "join2_s", "d", "ident_s", "left_6") .add(id, "7", "side_s", "left", "join1_i", "3", "join2_s", "e", "ident_s", "left_7") // 14 .add(id, "8", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_1", "join3_i", "0") // 1,15 .add(id, "9", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_2", "join3_i", "0") // 1,15 .add(id, "10", "side_s", "right", "join1_i", "1", "join2_s", "a", "ident_s", "right_3", "join3_i", "1") // 3 .add(id, "11", "side_s", "right", "join1_i", "1", "join2_s", "b", "ident_s", "right_4", "join3_i", "1") // 4 .add(id, "12", "side_s", "right", "join1_i", "1", "join2_s", "c", "ident_s", "right_5", "join3_i", "1") // 5 .add(id, "13", "side_s", "right", "join1_i", "2", "join2_s", "dad", "ident_s", "right_6", "join3_i", "2") .add(id, "14", "side_s", "right", "join1_i", "3", "join2_s", "e", "ident_s", "right_7", "join3_i", "3") // 7 .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("leftOuterJoin", LeftOuterJoinStream.class); // Basic test try { expression = StreamExpressionParser.parse("leftOuterJoin(" + "search(" + COLLECTIONORALIAS + ", q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"side_s:right\", fl=\"join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc\")," + "on=\"join1_i=join1_i, join2_s=join2_s\")"); stream = new LeftOuterJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 10); assertOrder(tuples, 1, 1, 15, 15, 2, 3, 4, 5, 6, 7); // Basic desc expression = StreamExpressionParser.parse("leftOuterJoin(" + "search(" + COLLECTIONORALIAS + ", q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i desc, join2_s asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"side_s:right\", fl=\"join1_i,join2_s,ident_s\", sort=\"join1_i desc, join2_s asc\")," + "on=\"join1_i=join1_i, join2_s=join2_s\")"); stream = new LeftOuterJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 10); assertOrder(tuples, 7, 6, 3, 4, 5, 1, 1, 15, 15, 2); // Results in both searches, no join matches expression = StreamExpressionParser.parse("leftOuterJoin(" + "search(" + COLLECTIONORALIAS + ", q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"ident_s asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"side_s:right\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"ident_s asc\", aliases=\"id=right.id, join1_i=right.join1_i, join2_s=right.join2_s, ident_s=right.ident_s\")," + "on=\"ident_s=right.ident_s\")"); stream = new LeftOuterJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 8); assertOrder(tuples, 1, 15, 2, 3, 4, 5, 6, 7); // Differing field names expression = StreamExpressionParser.parse("leftOuterJoin(" + "search(" + COLLECTIONORALIAS + ", q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")," + "search(" + COLLECTIONORALIAS + ", q=\"side_s:right\", fl=\"join3_i,join2_s,ident_s\", sort=\"join3_i asc, join2_s asc\", aliases=\"join3_i=aliasesField\")," + "on=\"join1_i=aliasesField, join2_s=join2_s\")"); stream = new LeftOuterJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 10); assertOrder(tuples, 1, 1, 15, 15, 2, 3, 4, 5, 6, 7); } finally { solrClientCache.close(); } } @Test public void testHashJoinStream() throws Exception { new UpdateRequest() .add(id, "1", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "15", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "2", "side_s", "left", "join1_i", "0", "join2_s", "b", "ident_s", "left_2") .add(id, "3", "side_s", "left", "join1_i", "1", "join2_s", "a", "ident_s", "left_3") // 10 .add(id, "4", "side_s", "left", "join1_i", "1", "join2_s", "b", "ident_s", "left_4") // 11 .add(id, "5", "side_s", "left", "join1_i", "1", "join2_s", "c", "ident_s", "left_5") // 12 .add(id, "6", "side_s", "left", "join1_i", "2", "join2_s", "d", "ident_s", "left_6") .add(id, "7", "side_s", "left", "join1_i", "3", "join2_s", "e", "ident_s", "left_7") // 14 .add(id, "8", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_1", "join3_i", "0") // 1,15 .add(id, "9", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_2", "join3_i", "0") // 1,15 .add(id, "10", "side_s", "right", "join1_i", "1", "join2_s", "a", "ident_s", "right_3", "join3_i", "1") // 3 .add(id, "11", "side_s", "right", "join1_i", "1", "join2_s", "b", "ident_s", "right_4", "join3_i", "1") // 4 .add(id, "12", "side_s", "right", "join1_i", "1", "join2_s", "c", "ident_s", "right_5", "join3_i", "1") // 5 .add(id, "13", "side_s", "right", "join1_i", "2", "join2_s", "dad", "ident_s", "right_6", "join3_i", "2") .add(id, "14", "side_s", "right", "join1_i", "3", "join2_s", "e", "ident_s", "right_7", "join3_i", "3") // 7 .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("hashJoin", HashJoinStream.class); try { // Basic test expression = StreamExpressionParser.parse("hashJoin(" + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")," + "hashed=search(collection1, q=\"side_s:right\", fl=\"join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc\")," + "on=\"join1_i, join2_s\")"); stream = new HashJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 8); assertOrder(tuples, 1, 1, 15, 15, 3, 4, 5, 7); // Basic desc expression = StreamExpressionParser.parse("hashJoin(" + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i desc, join2_s asc\")," + "hashed=search(collection1, q=\"side_s:right\", fl=\"join1_i,join2_s,ident_s\", sort=\"join1_i desc, join2_s asc\")," + "on=\"join1_i, join2_s\")"); stream = new HashJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 8); assertOrder(tuples, 7, 3, 4, 5, 1, 1, 15, 15); // Results in both searches, no join matches expression = StreamExpressionParser.parse("hashJoin(" + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"ident_s asc\")," + "hashed=search(collection1, q=\"side_s:right\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"ident_s asc\")," + "on=\"ident_s\")"); stream = new HashJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 0); // Basic test with "on" mapping expression = StreamExpressionParser.parse("hashJoin(" + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join3_i,ident_s\", sort=\"join1_i asc, join3_i asc, id asc\")," + "hashed=search(collection1, q=\"side_s:right\", fl=\"join1_i,join3_i,ident_s\", sort=\"join1_i asc, join3_i asc\")," + "on=\"join1_i=join3_i\")"); stream = new HashJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(17, tuples.size()); //Does a lexical sort assertOrder(tuples, 1, 1, 15, 15, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 7); } finally { solrClientCache.close(); } } @Test public void testOuterHashJoinStream() throws Exception { new UpdateRequest() .add(id, "1", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "15", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "2", "side_s", "left", "join1_i", "0", "join2_s", "b", "ident_s", "left_2") .add(id, "3", "side_s", "left", "join1_i", "1", "join2_s", "a", "ident_s", "left_3") // 10 .add(id, "4", "side_s", "left", "join1_i", "1", "join2_s", "b", "ident_s", "left_4") // 11 .add(id, "5", "side_s", "left", "join1_i", "1", "join2_s", "c", "ident_s", "left_5") // 12 .add(id, "6", "side_s", "left", "join1_i", "2", "join2_s", "d", "ident_s", "left_6") .add(id, "7", "side_s", "left", "join1_i", "3", "join2_s", "e", "ident_s", "left_7") // 14 .add(id, "8", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_1", "join3_i", "0") // 1,15 .add(id, "9", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_2", "join3_i", "0") // 1,15 .add(id, "10", "side_s", "right", "join1_i", "1", "join2_s", "a", "ident_s", "right_3", "join3_i", "1") // 3 .add(id, "11", "side_s", "right", "join1_i", "1", "join2_s", "b", "ident_s", "right_4", "join3_i", "1") // 4 .add(id, "12", "side_s", "right", "join1_i", "1", "join2_s", "c", "ident_s", "right_5", "join3_i", "1") // 5 .add(id, "13", "side_s", "right", "join1_i", "2", "join2_s", "dad", "ident_s", "right_6", "join3_i", "2") .add(id, "14", "side_s", "right", "join1_i", "3", "join2_s", "e", "ident_s", "right_7", "join3_i", "3") // 7 .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("outerHashJoin", OuterHashJoinStream.class); try { // Basic test expression = StreamExpressionParser.parse("outerHashJoin(" + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")," + "hashed=search(collection1, q=\"side_s:right\", fl=\"join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc\")," + "on=\"join1_i, join2_s\")"); stream = new OuterHashJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 10); assertOrder(tuples, 1, 1, 15, 15, 2, 3, 4, 5, 6, 7); // Basic desc expression = StreamExpressionParser.parse("outerHashJoin(" + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i desc, join2_s asc\")," + "hashed=search(collection1, q=\"side_s:right\", fl=\"join1_i,join2_s,ident_s\", sort=\"join1_i desc, join2_s asc\")," + "on=\"join1_i, join2_s\")"); stream = new OuterHashJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 10); assertOrder(tuples, 7, 6, 3, 4, 5, 1, 1, 15, 15, 2); // Results in both searches, no join matches expression = StreamExpressionParser.parse("outerHashJoin(" + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"ident_s asc\")," + "hashed=search(collection1, q=\"side_s:right\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"ident_s asc\")," + "on=\"ident_s\")"); stream = new OuterHashJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 8); assertOrder(tuples, 1, 15, 2, 3, 4, 5, 6, 7); // Basic test expression = StreamExpressionParser.parse("outerHashJoin(" + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")," + "hashed=search(collection1, q=\"side_s:right\", fl=\"join3_i,join2_s,ident_s\", sort=\"join2_s asc\")," + "on=\"join1_i=join3_i, join2_s\")"); stream = new OuterHashJoinStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 10); assertOrder(tuples, 1, 1, 15, 15, 2, 3, 4, 5, 6, 7); } finally { solrClientCache.close(); } } @Test public void testSelectStream() throws Exception { new UpdateRequest() .add(id, "1", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "15", "side_s", "left", "join1_i", "0", "join2_s", "a", "ident_s", "left_1") // 8, 9 .add(id, "2", "side_s", "left", "join1_i", "0", "join2_s", "b", "ident_s", "left_2") .add(id, "3", "side_s", "left", "join1_i", "1", "join2_s", "a", "ident_s", "left_3") // 10 .add(id, "4", "side_s", "left", "join1_i", "1", "join2_s", "b", "ident_s", "left_4") // 11 .add(id, "5", "side_s", "left", "join1_i", "1", "join2_s", "c", "ident_s", "left_5") // 12 .add(id, "6", "side_s", "left", "join1_i", "2", "join2_s", "d", "ident_s", "left_6") .add(id, "7", "side_s", "left", "join1_i", "3", "join2_s", "e", "ident_s", "left_7") // 14 .add(id, "8", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_1", "join3_i", "0") // 1,15 .add(id, "9", "side_s", "right", "join1_i", "0", "join2_s", "a", "ident_s", "right_2", "join3_i", "0") // 1,15 .add(id, "10", "side_s", "right", "join1_i", "1", "join2_s", "a", "ident_s", "right_3", "join3_i", "1") // 3 .add(id, "11", "side_s", "right", "join1_i", "1", "join2_s", "b", "ident_s", "right_4", "join3_i", "1") // 4 .add(id, "12", "side_s", "right", "join1_i", "1", "join2_s", "c", "ident_s", "right_5", "join3_i", "1") // 5 .add(id, "13", "side_s", "right", "join1_i", "2", "join2_s", "dad", "ident_s", "right_6", "join3_i", "2") .add(id, "14", "side_s", "right", "join1_i", "3", "join2_s", "e", "ident_s", "right_7", "join3_i", "3") // 7 .commit(cluster.getSolrClient(), COLLECTIONORALIAS); String clause; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("innerJoin", InnerJoinStream.class) .withFunctionName("select", SelectStream.class) .withFunctionName("replace", ReplaceOperation.class) .withFunctionName("concat", ConcatOperation.class) .withFunctionName("add", AddEvaluator.class) .withFunctionName("if", IfThenElseEvaluator.class) .withFunctionName("gt", GreaterThanEvaluator.class) ; try { // Basic test clause = "select(" + "id, join1_i as join1, join2_s as join2, ident_s as identity," + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")" + ")"; stream = factory.constructStream(clause); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertFields(tuples, "id", "join1", "join2", "identity"); assertNotFields(tuples, "join1_i", "join2_s", "ident_s"); // Basic with replacements test clause = "select(" + "id, join1_i as join1, join2_s as join2, ident_s as identity," + "replace(join1, 0, withValue=12), replace(join1, 3, withValue=12), replace(join1, 2, withField=join2)," + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")" + ")"; stream = factory.constructStream(clause); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertFields(tuples, "id", "join1", "join2", "identity"); assertNotFields(tuples, "join1_i", "join2_s", "ident_s"); assertLong(tuples.get(0), "join1", 12); assertLong(tuples.get(1), "join1", 12); assertLong(tuples.get(2), "join1", 12); assertLong(tuples.get(7), "join1", 12); assertString(tuples.get(6), "join1", "d"); // Basic with replacements and concat test clause = "select(" + "id, join1_i as join1, join2_s as join2, ident_s as identity," + "replace(join1, 0, withValue=12), replace(join1, 3, withValue=12), replace(join1, 2, withField=join2)," + "concat(fields=\"identity,join1\", as=\"newIdentity\",delim=\"-\")," + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")" + ")"; stream = factory.constructStream(clause); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertFields(tuples, "id", "join1", "join2", "identity", "newIdentity"); assertNotFields(tuples, "join1_i", "join2_s", "ident_s"); assertLong(tuples.get(0), "join1", 12); assertString(tuples.get(0), "newIdentity", "left_1-12"); assertLong(tuples.get(1), "join1", 12); assertString(tuples.get(1), "newIdentity", "left_1-12"); assertLong(tuples.get(2), "join1", 12); assertString(tuples.get(2), "newIdentity", "left_2-12"); assertLong(tuples.get(7), "join1", 12); assertString(tuples.get(7), "newIdentity", "left_7-12"); assertString(tuples.get(6), "join1", "d"); assertString(tuples.get(6), "newIdentity", "left_6-d"); // Inner stream test clause = "innerJoin(" + "select(" + "id, join1_i as left.join1, join2_s as left.join2, ident_s as left.ident," + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")" + ")," + "select(" + "join3_i as right.join1, join2_s as right.join2, ident_s as right.ident," + "search(collection1, q=\"side_s:right\", fl=\"join3_i,join2_s,ident_s\", sort=\"join3_i asc, join2_s asc\")," + ")," + "on=\"left.join1=right.join1, left.join2=right.join2\"" + ")"; stream = factory.constructStream(clause); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertFields(tuples, "id", "left.join1", "left.join2", "left.ident", "right.join1", "right.join2", "right.ident"); // Wrapped select test clause = "select(" + "id, left.ident, right.ident," + "innerJoin(" + "select(" + "id, join1_i as left.join1, join2_s as left.join2, ident_s as left.ident," + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i asc, join2_s asc, id asc\")" + ")," + "select(" + "join3_i as right.join1, join2_s as right.join2, ident_s as right.ident," + "search(collection1, q=\"side_s:right\", fl=\"join3_i,join2_s,ident_s\", sort=\"join3_i asc, join2_s asc\")," + ")," + "on=\"left.join1=right.join1, left.join2=right.join2\"" + ")" + ")"; stream = factory.constructStream(clause); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertFields(tuples, "id", "left.ident", "right.ident"); assertNotFields(tuples, "left.join1", "left.join2", "right.join1", "right.join2"); } finally { solrClientCache.close(); } } @Test public void testFacetStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); String clause; TupleStream stream; List<Tuple> tuples; StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("facet", FacetStream.class) .withFunctionName("sum", SumMetric.class) .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) .withFunctionName("avg", MeanMetric.class) .withFunctionName("count", CountMetric.class); // Basic test clause = "facet(" + "collection1, " + "q=\"*:*\", " + "fl=\"a_s,a_i,a_f\", " + "sort=\"a_s asc\", " + "buckets=\"a_s\", " + "bucketSorts=\"sum(a_i) asc\", " + "bucketSizeLimit=100, " + "sum(a_i), sum(a_f), " + "min(a_i), min(a_f), " + "max(a_i), max(a_f), " + "avg(a_i), avg(a_f), " + "count(*)" + ")"; stream = factory.constructStream(clause); tuples = getTuples(stream); assert(tuples.size() == 3); //Test Long and Double Sums Tuple tuple = tuples.get(0); String bucket = tuple.getString("a_s"); Double sumi = tuple.getDouble("sum(a_i)"); Double sumf = tuple.getDouble("sum(a_f)"); Double mini = tuple.getDouble("min(a_i)"); Double minf = tuple.getDouble("min(a_f)"); Double maxi = tuple.getDouble("max(a_i)"); Double maxf = tuple.getDouble("max(a_f)"); Double avgi = tuple.getDouble("avg(a_i)"); Double avgf = tuple.getDouble("avg(a_f)"); Double count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello4")); assertTrue(sumi.longValue() == 15); assertTrue(sumf.doubleValue() == 11.0D); assertTrue(mini.doubleValue() == 4.0D); assertTrue(minf.doubleValue() == 4.0D); assertTrue(maxi.doubleValue() == 11.0D); assertTrue(maxf.doubleValue() == 7.0D); assertTrue(avgi.doubleValue() == 7.5D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 2); tuple = tuples.get(1); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello0")); assertTrue(sumi.doubleValue() == 17.0D); assertTrue(sumf.doubleValue() == 18.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 4.25D); assertTrue(avgf.doubleValue() == 4.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(2); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello3")); assertTrue(sumi.doubleValue() == 38.0D); assertTrue(sumf.doubleValue() == 26.0D); assertTrue(mini.doubleValue() == 3.0D); assertTrue(minf.doubleValue() == 3.0D); assertTrue(maxi.doubleValue() == 13.0D); assertTrue(maxf.doubleValue() == 9.0D); assertTrue(avgi.doubleValue() == 9.5D); assertTrue(avgf.doubleValue() == 6.5D); assertTrue(count.doubleValue() == 4); //Reverse the Sort. clause = "facet(" + "collection1, " + "q=\"*:*\", " + "fl=\"a_s,a_i,a_f\", " + "sort=\"a_s asc\", " + "buckets=\"a_s\", " + "bucketSorts=\"sum(a_i) desc\", " + "bucketSizeLimit=100, " + "sum(a_i), sum(a_f), " + "min(a_i), min(a_f), " + "max(a_i), max(a_f), " + "avg(a_i), avg(a_f), " + "count(*)" + ")"; stream = factory.constructStream(clause); tuples = getTuples(stream); //Test Long and Double Sums tuple = tuples.get(0); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello3")); assertTrue(sumi.doubleValue() == 38.0D); assertTrue(sumf.doubleValue() == 26.0D); assertTrue(mini.doubleValue() == 3.0D); assertTrue(minf.doubleValue() == 3.0D); assertTrue(maxi.doubleValue() == 13.0D); assertTrue(maxf.doubleValue() == 9.0D); assertTrue(avgi.doubleValue() == 9.5D); assertTrue(avgf.doubleValue() == 6.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(1); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello0")); assertTrue(sumi.doubleValue() == 17.0D); assertTrue(sumf.doubleValue() == 18.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 4.25D); assertTrue(avgf.doubleValue() == 4.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(2); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello4")); assertTrue(sumi.longValue() == 15); assertTrue(sumf.doubleValue() == 11.0D); assertTrue(mini.doubleValue() == 4.0D); assertTrue(minf.doubleValue() == 4.0D); assertTrue(maxi.doubleValue() == 11.0D); assertTrue(maxf.doubleValue() == 7.0D); assertTrue(avgi.doubleValue() == 7.5D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 2); //Test index sort clause = "facet(" + "collection1, " + "q=\"*:*\", " + "fl=\"a_s,a_i,a_f\", " + "sort=\"a_s asc\", " + "buckets=\"a_s\", " + "bucketSorts=\"a_s desc\", " + "bucketSizeLimit=100, " + "sum(a_i), sum(a_f), " + "min(a_i), min(a_f), " + "max(a_i), max(a_f), " + "avg(a_i), avg(a_f), " + "count(*)" + ")"; stream = factory.constructStream(clause); tuples = getTuples(stream); assert(tuples.size() == 3); tuple = tuples.get(0); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello4")); assertTrue(sumi.longValue() == 15); assertTrue(sumf.doubleValue() == 11.0D); assertTrue(mini.doubleValue() == 4.0D); assertTrue(minf.doubleValue() == 4.0D); assertTrue(maxi.doubleValue() == 11.0D); assertTrue(maxf.doubleValue() == 7.0D); assertTrue(avgi.doubleValue() == 7.5D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 2); tuple = tuples.get(1); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello3")); assertTrue(sumi.doubleValue() == 38.0D); assertTrue(sumf.doubleValue() == 26.0D); assertTrue(mini.doubleValue() == 3.0D); assertTrue(minf.doubleValue() == 3.0D); assertTrue(maxi.doubleValue() == 13.0D); assertTrue(maxf.doubleValue() == 9.0D); assertTrue(avgi.doubleValue() == 9.5D); assertTrue(avgf.doubleValue() == 6.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(2); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello0")); assertTrue(sumi.doubleValue() == 17.0D); assertTrue(sumf.doubleValue() == 18.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 4.25D); assertTrue(avgf.doubleValue() == 4.5D); assertTrue(count.doubleValue() == 4); //Test index sort clause = "facet(" + "collection1, " + "q=\"*:*\", " + "fl=\"a_s,a_i,a_f\", " + "sort=\"a_s asc\", " + "buckets=\"a_s\", " + "bucketSorts=\"a_s asc\", " + "bucketSizeLimit=100, " + "sum(a_i), sum(a_f), " + "min(a_i), min(a_f), " + "max(a_i), max(a_f), " + "avg(a_i), avg(a_f), " + "count(*)" + ")"; stream = factory.constructStream(clause); tuples = getTuples(stream); assert(tuples.size() == 3); tuple = tuples.get(0); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello0")); assertTrue(sumi.doubleValue() == 17.0D); assertTrue(sumf.doubleValue() == 18.0D); assertTrue(mini.doubleValue() == 0.0D); assertTrue(minf.doubleValue() == 1.0D); assertTrue(maxi.doubleValue() == 14.0D); assertTrue(maxf.doubleValue() == 10.0D); assertTrue(avgi.doubleValue() == 4.25D); assertTrue(avgf.doubleValue() == 4.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(1); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello3")); assertTrue(sumi.doubleValue() == 38.0D); assertTrue(sumf.doubleValue() == 26.0D); assertTrue(mini.doubleValue() == 3.0D); assertTrue(minf.doubleValue() == 3.0D); assertTrue(maxi.doubleValue() == 13.0D); assertTrue(maxf.doubleValue() == 9.0D); assertTrue(avgi.doubleValue() == 9.5D); assertTrue(avgf.doubleValue() == 6.5D); assertTrue(count.doubleValue() == 4); tuple = tuples.get(2); bucket = tuple.getString("a_s"); sumi = tuple.getDouble("sum(a_i)"); sumf = tuple.getDouble("sum(a_f)"); mini = tuple.getDouble("min(a_i)"); minf = tuple.getDouble("min(a_f)"); maxi = tuple.getDouble("max(a_i)"); maxf = tuple.getDouble("max(a_f)"); avgi = tuple.getDouble("avg(a_i)"); avgf = tuple.getDouble("avg(a_f)"); count = tuple.getDouble("count(*)"); assertTrue(bucket.equals("hello4")); assertTrue(sumi.longValue() == 15); assertTrue(sumf.doubleValue() == 11.0D); assertTrue(mini.doubleValue() == 4.0D); assertTrue(minf.doubleValue() == 4.0D); assertTrue(maxi.doubleValue() == 11.0D); assertTrue(maxf.doubleValue() == 7.0D); assertTrue(avgi.doubleValue() == 7.5D); assertTrue(avgf.doubleValue() == 5.5D); assertTrue(count.doubleValue() == 2); //Test zero result facets clause = "facet(" + "collection1, " + "q=\"blahhh\", " + "fl=\"a_s,a_i,a_f\", " + "sort=\"a_s asc\", " + "buckets=\"a_s\", " + "bucketSorts=\"a_s asc\", " + "bucketSizeLimit=100, " + "sum(a_i), sum(a_f), " + "min(a_i), min(a_f), " + "max(a_i), max(a_f), " + "avg(a_i), avg(a_f), " + "count(*)" + ")"; stream = factory.constructStream(clause); tuples = getTuples(stream); assert(tuples.size() == 0); } @Test public void testSubFacetStream() throws Exception { new UpdateRequest() .add(id, "0", "level1_s", "hello0", "level2_s", "a", "a_i", "0", "a_f", "1") .add(id, "2", "level1_s", "hello0", "level2_s", "a", "a_i", "2", "a_f", "2") .add(id, "3", "level1_s", "hello3", "level2_s", "a", "a_i", "3", "a_f", "3") .add(id, "4", "level1_s", "hello4", "level2_s", "a", "a_i", "4", "a_f", "4") .add(id, "1", "level1_s", "hello0", "level2_s", "b", "a_i", "1", "a_f", "5") .add(id, "5", "level1_s", "hello3", "level2_s", "b", "a_i", "10", "a_f", "6") .add(id, "6", "level1_s", "hello4", "level2_s", "b", "a_i", "11", "a_f", "7") .add(id, "7", "level1_s", "hello3", "level2_s", "b", "a_i", "12", "a_f", "8") .add(id, "8", "level1_s", "hello3", "level2_s", "b", "a_i", "13", "a_f", "9") .add(id, "9", "level1_s", "hello0", "level2_s", "b", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); String clause; TupleStream stream; List<Tuple> tuples; StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("facet", FacetStream.class) .withFunctionName("sum", SumMetric.class) .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) .withFunctionName("avg", MeanMetric.class) .withFunctionName("count", CountMetric.class); // Basic test clause = "facet(" + "collection1, " + "q=\"*:*\", " + "buckets=\"level1_s, level2_s\", " + "bucketSorts=\"sum(a_i) desc, sum(a_i) desc)\", " + "bucketSizeLimit=100, " + "sum(a_i), count(*)" + ")"; stream = factory.constructStream(clause); tuples = getTuples(stream); assert(tuples.size() == 6); Tuple tuple = tuples.get(0); String bucket1 = tuple.getString("level1_s"); String bucket2 = tuple.getString("level2_s"); Double sumi = tuple.getDouble("sum(a_i)"); Double count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello3")); assertTrue(bucket2.equals("b")); assertTrue(sumi.longValue() == 35); assertTrue(count.doubleValue() == 3); tuple = tuples.get(1); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello0")); assertTrue(bucket2.equals("b")); assertTrue(sumi.longValue() == 15); assertTrue(count.doubleValue() == 2); tuple = tuples.get(2); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello4")); assertTrue(bucket2.equals("b")); assertTrue(sumi.longValue() == 11); assertTrue(count.doubleValue() == 1); tuple = tuples.get(3); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello4")); assertTrue(bucket2.equals("a")); assertTrue(sumi.longValue() == 4); assertTrue(count.doubleValue() == 1); tuple = tuples.get(4); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello3")); assertTrue(bucket2.equals("a")); assertTrue(sumi.longValue() == 3); assertTrue(count.doubleValue() == 1); tuple = tuples.get(5); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello0")); assertTrue(bucket2.equals("a")); assertTrue(sumi.longValue() == 2); assertTrue(count.doubleValue() == 2); clause = "facet(" + "collection1, " + "q=\"*:*\", " + "buckets=\"level1_s, level2_s\", " + "bucketSorts=\"level1_s desc, level2_s desc)\", " + "bucketSizeLimit=100, " + "sum(a_i), count(*)" + ")"; stream = factory.constructStream(clause); tuples = getTuples(stream); assert(tuples.size() == 6); tuple = tuples.get(0); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello4")); assertTrue(bucket2.equals("b")); assertTrue(sumi.longValue() == 11); assertTrue(count.doubleValue() == 1); tuple = tuples.get(1); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello4")); assertTrue(bucket2.equals("a")); assertTrue(sumi.longValue() == 4); assertTrue(count.doubleValue() == 1); tuple = tuples.get(2); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello3")); assertTrue(bucket2.equals("b")); assertTrue(sumi.longValue() == 35); assertTrue(count.doubleValue() == 3); tuple = tuples.get(3); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello3")); assertTrue(bucket2.equals("a")); assertTrue(sumi.longValue() == 3); assertTrue(count.doubleValue() == 1); tuple = tuples.get(4); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello0")); assertTrue(bucket2.equals("b")); assertTrue(sumi.longValue() == 15); assertTrue(count.doubleValue() == 2); tuple = tuples.get(5); bucket1 = tuple.getString("level1_s"); bucket2 = tuple.getString("level2_s"); sumi = tuple.getDouble("sum(a_i)"); count = tuple.getDouble("count(*)"); assertTrue(bucket1.equals("hello0")); assertTrue(bucket2.equals("a")); assertTrue(sumi.longValue() == 2); assertTrue(count.doubleValue() == 2); } @Test public void testTopicStream() throws Exception { Assume.assumeTrue(!useAlias); new UpdateRequest() .add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("topic", TopicStream.class) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("daemon", DaemonStream.class); StreamExpression expression; TupleStream stream; List<Tuple> tuples; SolrClientCache cache = new SolrClientCache(); try { //Store checkpoints in the same index as the main documents. This perfectly valid expression = StreamExpressionParser.parse("topic(collection1, collection1, q=\"a_s:hello\", fl=\"id\", id=\"1000000\", checkpointEvery=3)"); stream = factory.constructStream(expression); StreamContext context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); //Should be zero because the checkpoints will be set to the highest vesion on the shards. assertEquals(tuples.size(), 0); cluster.getSolrClient().commit("collection1"); //Now check to see if the checkpoints are present expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); assertEquals(tuples.size(), 1); List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss"); assertEquals(checkpoints.size(), 2); Long version1 = tuples.get(0).getLong("_version_"); //Index a few more documents new UpdateRequest() .add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); expression = StreamExpressionParser.parse("topic(collection1, collection1, fl=\"id\", q=\"a_s:hello\", id=\"1000000\", checkpointEvery=2)"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); try { stream.open(); Tuple tuple1 = stream.read(); assertEquals((long) tuple1.getLong("id"), 10l); cluster.getSolrClient().commit("collection1"); // Checkpoint should not have changed. expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")"); TupleStream cstream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); cstream.setStreamContext(context); tuples = getTuples(cstream); assertEquals(tuples.size(), 1); checkpoints = tuples.get(0).getStrings("checkpoint_ss"); assertEquals(checkpoints.size(), 2); Long version2 = tuples.get(0).getLong("_version_"); assertEquals(version1, version2); Tuple tuple2 = stream.read(); cluster.getSolrClient().commit("collection1"); assertEquals((long) tuple2.getLong("id"), 11l); //Checkpoint should have changed. expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")"); cstream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); cstream.setStreamContext(context); tuples = getTuples(cstream); assertEquals(tuples.size(), 1); checkpoints = tuples.get(0).getStrings("checkpoint_ss"); assertEquals(checkpoints.size(), 2); Long version3 = tuples.get(0).getLong("_version_"); assertTrue(version3 > version2); Tuple tuple3 = stream.read(); assertTrue(tuple3.EOF); } finally { stream.close(); } //Test with the DaemonStream DaemonStream dstream = null; try { expression = StreamExpressionParser.parse("daemon(topic(collection1, collection1, fl=\"id\", q=\"a_s:hello\", id=\"1000000\", checkpointEvery=2), id=\"test\", runInterval=\"1000\", queueSize=\"9\")"); dstream = (DaemonStream) factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); dstream.setStreamContext(context); //Index a few more documents new UpdateRequest() .add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); //Start reading from the DaemonStream Tuple tuple = null; dstream.open(); tuple = dstream.read(); assertEquals(12, (long) tuple.getLong(id)); tuple = dstream.read(); assertEquals(13, (long) tuple.getLong(id)); cluster.getSolrClient().commit("collection1"); // We want to see if the version has been updated after reading two tuples //Index a few more documents new UpdateRequest() .add(id, "14", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "15", "a_s", "hello", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); //Read from the same DaemonStream stream tuple = dstream.read(); assertEquals(14, (long) tuple.getLong(id)); tuple = dstream.read(); // This should trigger a checkpoint as it's the 4th read from the stream. assertEquals(15, (long) tuple.getLong(id)); dstream.shutdown(); tuple = dstream.read(); assertTrue(tuple.EOF); } finally { dstream.close(); } } finally { cache.close(); } } @Test public void testPriorityStream() throws Exception { Assume.assumeTrue(!useAlias); new UpdateRequest() .add(id, "0", "a_s", "hello1", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello1", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello1", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello1", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello1", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("topic", TopicStream.class) .withFunctionName("priority", PriorityStream.class); StreamExpression expression; TupleStream stream; List<Tuple> tuples; SolrClientCache cache = new SolrClientCache(); try { FieldComparator comp = new FieldComparator("a_i", ComparatorOrder.ASCENDING); expression = StreamExpressionParser.parse("priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); stream = factory.constructStream(expression); StreamContext context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); Collections.sort(tuples, comp); //The tuples from the first topic (high priority) should be returned. assertEquals(tuples.size(), 4); assertOrder(tuples, 5, 6, 7, 8); expression = StreamExpressionParser.parse("priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); Collections.sort(tuples, comp); //The Tuples from the second topic (Low priority) should be returned. assertEquals(tuples.size(), 6); assertOrder(tuples, 0, 1, 2, 3, 4, 9); expression = StreamExpressionParser.parse("priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); //Both queus are empty. assertEquals(tuples.size(), 0); } finally { cache.close(); } } @Test public void testParallelPriorityStream() throws Exception { Assume.assumeTrue(!useAlias); new UpdateRequest() .add(id, "0", "a_s", "hello1", "a_i", "0", "a_f", "1") .add(id, "2", "a_s", "hello1", "a_i", "2", "a_f", "2") .add(id, "3", "a_s", "hello1", "a_i", "3", "a_f", "3") .add(id, "4", "a_s", "hello1", "a_i", "4", "a_f", "4") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "5") .add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6") .add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7") .add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8") .add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "9", "a_s", "hello1", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("topic", TopicStream.class) .withFunctionName("parallel", ParallelStream.class) .withFunctionName("priority", PriorityStream.class); StreamExpression expression; TupleStream stream; List<Tuple> tuples; SolrClientCache cache = new SolrClientCache(); try { FieldComparator comp = new FieldComparator("a_i", ComparatorOrder.ASCENDING); expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); stream = factory.constructStream(expression); StreamContext context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); Collections.sort(tuples, comp); //The tuples from the first topic (high priority) should be returned. assertEquals(tuples.size(), 4); assertOrder(tuples, 5, 6, 7, 8); expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); Collections.sort(tuples, comp); //The Tuples from the second topic (Low priority) should be returned. assertEquals(tuples.size(), 6); assertOrder(tuples, 0, 1, 2, 3, 4, 9); expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); //Both queus are empty. assertEquals(tuples.size(), 0); } finally { cache.close(); } } @Test public void testParallelTopicStream() throws Exception { Assume.assumeTrue(!useAlias); new UpdateRequest() .add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1", "subject", "ha ha bla blah0") .add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2", "subject", "ha ha bla blah2") .add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3", "subject", "ha ha bla blah3") .add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4", "subject", "ha ha bla blah4") .add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5", "subject", "ha ha bla blah5") .add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6", "subject", "ha ha bla blah6") .add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7", "subject", "ha ha bla blah7") .add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8", "subject", "ha ha bla blah8") .add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9", "subject", "ha ha bla blah9") .add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10", "subject", "ha ha bla blah10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("topic", TopicStream.class) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("parallel", ParallelStream.class) .withFunctionName("daemon", DaemonStream.class); StreamExpression expression; TupleStream stream; List<Tuple> tuples; SolrClientCache cache = new SolrClientCache(); try { //Store checkpoints in the same index as the main documents. This is perfectly valid expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))"); stream = factory.constructStream(expression); StreamContext context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); //Should be zero because the checkpoints will be set to the highest version on the shards. assertEquals(tuples.size(), 0); cluster.getSolrClient().commit("collection1"); //Now check to see if the checkpoints are present expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000*\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); tuples = getTuples(stream); assertEquals(tuples.size(), 2); List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss"); assertEquals(checkpoints.size(), 2); String id1 = tuples.get(0).getString("id"); String id2 = tuples.get(1).getString("id"); assertTrue(id1.equals("1000000_0")); assertTrue(id2.equals("1000000_1")); //Index a few more documents new UpdateRequest() .add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); assertTopicRun(stream, "10", "11"); //Test will initial checkpoint. This should pull all expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"2000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); assertTopicRun(stream, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"); //Add more documents //Index a few more documents new UpdateRequest() .add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9") .add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); //Run the same topic again including the initialCheckpoint. It should start where it left off. //initialCheckpoint should be ignored for all but the first run. stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); assertTopicRun(stream, "12", "13"); //Test text extraction expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"subject:bla\", " + "fl=\"subject\", " + "id=\"3000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))"); stream = factory.constructStream(expression); context = new StreamContext(); context.setSolrClientCache(cache); stream.setStreamContext(context); assertTopicSubject(stream, "ha ha bla blah0", "ha ha bla blah1", "ha ha bla blah2", "ha ha bla blah3", "ha ha bla blah4", "ha ha bla blah5", "ha ha bla blah6", "ha ha bla blah7", "ha ha bla blah8", "ha ha bla blah9", "ha ha bla blah10"); } finally { cache.close(); } } @Test public void testUpdateStream() throws Exception { CollectionAdminRequest.createCollection("destinationCollection", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("destinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777") .commit(cluster.getSolrClient(), "collection1"); StreamExpression expression; TupleStream stream; Tuple t; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("destinationCollection", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class); try { //Copy all docs to destinationCollection expression = StreamExpressionParser.parse("update(destinationCollection, batchSize=5, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\"))"); stream = new UpdateStream(expression, factory); stream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(stream); cluster.getSolrClient().commit("destinationCollection"); //Ensure that all UpdateStream tuples indicate the correct number of copied/indexed docs assert (tuples.size() == 1); t = tuples.get(0); assert (t.EOF == false); assertEquals(5, t.get("batchIndexed")); //Ensure that destinationCollection actually has the new docs. expression = StreamExpressionParser.parse("search(destinationCollection, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(5, tuples.size()); Tuple tuple = tuples.get(0); assert (tuple.getLong("id") == 0); assert (tuple.get("a_s").equals("hello0")); assert (tuple.getLong("a_i") == 0); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7")); tuple = tuples.get(1); assert (tuple.getLong("id") == 1); assert (tuple.get("a_s").equals("hello1")); assert (tuple.getLong("a_i") == 1); assert (tuple.getDouble("a_f") == 1.0); assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777")); tuple = tuples.get(2); assert (tuple.getLong("id") == 2); assert (tuple.get("a_s").equals("hello2")); assert (tuple.getLong("a_i") == 2); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77")); tuple = tuples.get(3); assert (tuple.getLong("id") == 3); assert (tuple.get("a_s").equals("hello3")); assert (tuple.getLong("a_i") == 3); assert (tuple.getDouble("a_f") == 3.0); assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2"); assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777")); tuple = tuples.get(4); assert (tuple.getLong("id") == 4); assert (tuple.get("a_s").equals("hello4")); assert (tuple.getLong("a_i") == 4); assert (tuple.getDouble("a_f") == 4.0); assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777")); } finally { CollectionAdminRequest.deleteCollection("destinationCollection").process(cluster.getSolrClient()); solrClientCache.close(); } } @Test public void testParallelUpdateStream() throws Exception { CollectionAdminRequest.createCollection("parallelDestinationCollection", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("parallelDestinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777") .commit(cluster.getSolrClient(), "collection1"); StreamExpression expression; TupleStream stream; Tuple t; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("parallelDestinationCollection", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class) .withFunctionName("parallel", ParallelStream.class); try { //Copy all docs to destinationCollection String updateExpression = "update(parallelDestinationCollection, batchSize=2, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\", partitionKeys=\"a_f\"))"; TupleStream parallelUpdateStream = factory.constructStream("parallel(collection1, " + updateExpression + ", workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"batchNumber asc\")"); parallelUpdateStream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(parallelUpdateStream); cluster.getSolrClient().commit("parallelDestinationCollection"); //Ensure that all UpdateStream tuples indicate the correct number of copied/indexed docs long count = 0; for (Tuple tuple : tuples) { count += tuple.getLong("batchIndexed"); } assert (count == 5); //Ensure that destinationCollection actually has the new docs. expression = StreamExpressionParser.parse("search(parallelDestinationCollection, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(5, tuples.size()); Tuple tuple = tuples.get(0); assert (tuple.getLong("id") == 0); assert (tuple.get("a_s").equals("hello0")); assert (tuple.getLong("a_i") == 0); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7")); tuple = tuples.get(1); assert (tuple.getLong("id") == 1); assert (tuple.get("a_s").equals("hello1")); assert (tuple.getLong("a_i") == 1); assert (tuple.getDouble("a_f") == 1.0); assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777")); tuple = tuples.get(2); assert (tuple.getLong("id") == 2); assert (tuple.get("a_s").equals("hello2")); assert (tuple.getLong("a_i") == 2); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77")); tuple = tuples.get(3); assert (tuple.getLong("id") == 3); assert (tuple.get("a_s").equals("hello3")); assert (tuple.getLong("a_i") == 3); assert (tuple.getDouble("a_f") == 3.0); assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2"); assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777")); tuple = tuples.get(4); assert (tuple.getLong("id") == 4); assert (tuple.get("a_s").equals("hello4")); assert (tuple.getLong("a_i") == 4); assert (tuple.getDouble("a_f") == 4.0); assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777")); } finally { CollectionAdminRequest.deleteCollection("parallelDestinationCollection").process(cluster.getSolrClient()); solrClientCache.close(); } } @Test public void testParallelDaemonUpdateStream() throws Exception { CollectionAdminRequest.createCollection("parallelDestinationCollection1", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("parallelDestinationCollection1", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777") .commit(cluster.getSolrClient(), "collection1"); StreamExpression expression; TupleStream stream; Tuple t; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("parallelDestinationCollection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class) .withFunctionName("parallel", ParallelStream.class) .withFunctionName("daemon", DaemonStream.class); try { //Copy all docs to destinationCollection String updateExpression = "daemon(update(parallelDestinationCollection1, batchSize=2, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\", partitionKeys=\"a_f\")), runInterval=\"1000\", id=\"test\")"; TupleStream parallelUpdateStream = factory.constructStream("parallel(collection1, " + updateExpression + ", workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"batchNumber asc\")"); parallelUpdateStream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(parallelUpdateStream); assert (tuples.size() == 2); //Lets sleep long enough for daemon updates to run. //Lets stop the daemons ModifiableSolrParams sParams = new ModifiableSolrParams(StreamingTest.mapParams(CommonParams.QT, "/stream", "action", "list")); int workersComplete = 0; for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { int iterations = 0; INNER: while (iterations == 0) { SolrStream solrStream = new SolrStream(jetty.getBaseUrl().toString() + "/collection1", sParams); solrStream.setStreamContext(streamContext); solrStream.open(); Tuple tupleResponse = solrStream.read(); if (tupleResponse.EOF) { solrStream.close(); break INNER; } else { long l = tupleResponse.getLong("iterations"); if (l > 0) { ++workersComplete; } else { try { Thread.sleep(1000); } catch (Exception e) { } } iterations = (int) l; solrStream.close(); } } } assertEquals(cluster.getJettySolrRunners().size(), workersComplete); cluster.getSolrClient().commit("parallelDestinationCollection1"); //Lets stop the daemons sParams = new ModifiableSolrParams(); sParams.set(CommonParams.QT, "/stream"); sParams.set("action", "stop"); sParams.set("id", "test"); for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { SolrStream solrStream = new SolrStream(jetty.getBaseUrl() + "/collection1", sParams); solrStream.setStreamContext(streamContext); solrStream.open(); Tuple tupleResponse = solrStream.read(); solrStream.close(); } sParams = new ModifiableSolrParams(); sParams.set(CommonParams.QT, "/stream"); sParams.set("action", "list"); workersComplete = 0; for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { long stopTime = 0; INNER: while (stopTime == 0) { SolrStream solrStream = new SolrStream(jetty.getBaseUrl() + "/collection1", sParams); solrStream.setStreamContext(streamContext); solrStream.open(); Tuple tupleResponse = solrStream.read(); if (tupleResponse.EOF) { solrStream.close(); break INNER; } else { stopTime = tupleResponse.getLong("stopTime"); if (stopTime > 0) { ++workersComplete; } else { try { Thread.sleep(1000); } catch (Exception e) { } } solrStream.close(); } } } assertEquals(cluster.getJettySolrRunners().size(), workersComplete); //Ensure that destinationCollection actually has the new docs. expression = StreamExpressionParser.parse("search(parallelDestinationCollection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(5, tuples.size()); Tuple tuple = tuples.get(0); assert (tuple.getLong("id") == 0); assert (tuple.get("a_s").equals("hello0")); assert (tuple.getLong("a_i") == 0); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7")); tuple = tuples.get(1); assert (tuple.getLong("id") == 1); assert (tuple.get("a_s").equals("hello1")); assert (tuple.getLong("a_i") == 1); assert (tuple.getDouble("a_f") == 1.0); assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777")); tuple = tuples.get(2); assert (tuple.getLong("id") == 2); assert (tuple.get("a_s").equals("hello2")); assert (tuple.getLong("a_i") == 2); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77")); tuple = tuples.get(3); assert (tuple.getLong("id") == 3); assert (tuple.get("a_s").equals("hello3")); assert (tuple.getLong("a_i") == 3); assert (tuple.getDouble("a_f") == 3.0); assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2"); assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777")); tuple = tuples.get(4); assert (tuple.getLong("id") == 4); assert (tuple.get("a_s").equals("hello4")); assert (tuple.getLong("a_i") == 4); assert (tuple.getDouble("a_f") == 4.0); assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777")); } finally { CollectionAdminRequest.deleteCollection("parallelDestinationCollection1").process(cluster.getSolrClient()); solrClientCache.close(); } } @Test public void testParallelTerminatingDaemonUpdateStream() throws Exception { Assume.assumeTrue(!useAlias); CollectionAdminRequest.createCollection("parallelDestinationCollection1", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("parallelDestinationCollection1", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); new UpdateRequest() .add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7") .add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77") .add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777") .add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777") .add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777") .commit(cluster.getSolrClient(), "collection1"); StreamExpression expression; TupleStream stream; Tuple t; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("parallelDestinationCollection1", cluster.getZkServer().getZkAddress()) .withFunctionName("topic", TopicStream.class) .withFunctionName("update", UpdateStream.class) .withFunctionName("parallel", ParallelStream.class) .withFunctionName("daemon", DaemonStream.class); try { //Copy all docs to destinationCollection String updateExpression = "daemon(update(parallelDestinationCollection1, batchSize=2, topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", partitionKeys=\"a_f\", initialCheckpoint=0, id=\"topic1\")), terminate=true, runInterval=\"1000\", id=\"test\")"; TupleStream parallelUpdateStream = factory.constructStream("parallel(collection1, " + updateExpression + ", workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"batchNumber asc\")"); parallelUpdateStream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(parallelUpdateStream); assert (tuples.size() == 2); ModifiableSolrParams sParams = new ModifiableSolrParams(StreamingTest.mapParams(CommonParams.QT, "/stream", "action", "list")); int workersComplete = 0; //Daemons should terminate after the topic is completed //Loop through all shards and wait for the daemons to be gone from the listing. for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { INNER: while (true) { SolrStream solrStream = new SolrStream(jetty.getBaseUrl().toString() + "/collection1", sParams); solrStream.setStreamContext(streamContext); solrStream.open(); Tuple tupleResponse = solrStream.read(); if (tupleResponse.EOF) { solrStream.close(); ++workersComplete; break INNER; } else { solrStream.close(); Thread.sleep(1000); } } } assertEquals(cluster.getJettySolrRunners().size(), workersComplete); cluster.getSolrClient().commit("parallelDestinationCollection1"); //Ensure that destinationCollection actually has the new docs. expression = StreamExpressionParser.parse("search(parallelDestinationCollection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(5, tuples.size()); Tuple tuple = tuples.get(0); assert (tuple.getLong("id") == 0); assert (tuple.get("a_s").equals("hello")); assert (tuple.getLong("a_i") == 0); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7")); tuple = tuples.get(1); assert (tuple.getLong("id") == 1); assert (tuple.get("a_s").equals("hello")); assert (tuple.getLong("a_i") == 1); assert (tuple.getDouble("a_f") == 1.0); assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777")); tuple = tuples.get(2); assert (tuple.getLong("id") == 2); assert (tuple.get("a_s").equals("hello")); assert (tuple.getLong("a_i") == 2); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77")); tuple = tuples.get(3); assert (tuple.getLong("id") == 3); assert (tuple.get("a_s").equals("hello")); assert (tuple.getLong("a_i") == 3); assert (tuple.getDouble("a_f") == 3.0); assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2"); assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777")); tuple = tuples.get(4); assert (tuple.getLong("id") == 4); assert (tuple.get("a_s").equals("hello")); assert (tuple.getLong("a_i") == 4); assert (tuple.getDouble("a_f") == 4.0); assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777")); } finally { CollectionAdminRequest.deleteCollection("parallelDestinationCollection1").process(cluster.getSolrClient()); solrClientCache.close(); } } //////////////////////////////////////////// @Test public void testCommitStream() throws Exception { CollectionAdminRequest.createCollection("destinationCollection", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("destinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777") .commit(cluster.getSolrClient(), "collection1"); StreamExpression expression; TupleStream stream; Tuple t; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("destinationCollection", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class) .withFunctionName("commit", CommitStream.class); try { //Copy all docs to destinationCollection expression = StreamExpressionParser.parse("commit(destinationCollection, batchSize=2, update(destinationCollection, batchSize=5, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\")))"); stream = factory.constructStream(expression); stream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(stream); //Ensure that all CommitStream tuples indicate the correct number of copied/indexed docs assert (tuples.size() == 1); t = tuples.get(0); assert (t.EOF == false); assertEquals(5, t.get("batchIndexed")); //Ensure that destinationCollection actually has the new docs. expression = StreamExpressionParser.parse("search(destinationCollection, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(5, tuples.size()); Tuple tuple = tuples.get(0); assert (tuple.getLong("id") == 0); assert (tuple.get("a_s").equals("hello0")); assert (tuple.getLong("a_i") == 0); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7")); tuple = tuples.get(1); assert (tuple.getLong("id") == 1); assert (tuple.get("a_s").equals("hello1")); assert (tuple.getLong("a_i") == 1); assert (tuple.getDouble("a_f") == 1.0); assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777")); tuple = tuples.get(2); assert (tuple.getLong("id") == 2); assert (tuple.get("a_s").equals("hello2")); assert (tuple.getLong("a_i") == 2); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77")); tuple = tuples.get(3); assert (tuple.getLong("id") == 3); assert (tuple.get("a_s").equals("hello3")); assert (tuple.getLong("a_i") == 3); assert (tuple.getDouble("a_f") == 3.0); assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2"); assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777")); tuple = tuples.get(4); assert (tuple.getLong("id") == 4); assert (tuple.get("a_s").equals("hello4")); assert (tuple.getLong("a_i") == 4); assert (tuple.getDouble("a_f") == 4.0); assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777")); } finally { CollectionAdminRequest.deleteCollection("destinationCollection").process(cluster.getSolrClient()); solrClientCache.close(); } } @Test public void testParallelCommitStream() throws Exception { CollectionAdminRequest.createCollection("parallelDestinationCollection", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("parallelDestinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777") .commit(cluster.getSolrClient(), "collection1"); StreamExpression expression; TupleStream stream; Tuple t; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("parallelDestinationCollection", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class) .withFunctionName("commit", CommitStream.class) .withFunctionName("parallel", ParallelStream.class); try { //Copy all docs to destinationCollection String updateExpression = "commit(parallelDestinationCollection, batchSize=0, zkHost=\"" + cluster.getZkServer().getZkAddress() + "\", update(parallelDestinationCollection, batchSize=2, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\", partitionKeys=\"a_f\")))"; TupleStream parallelUpdateStream = factory.constructStream("parallel(collection1, " + updateExpression + ", workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"batchNumber asc\")"); parallelUpdateStream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(parallelUpdateStream); //Ensure that all UpdateStream tuples indicate the correct number of copied/indexed docs long count = 0; for (Tuple tuple : tuples) { count += tuple.getLong("batchIndexed"); } assert (count == 5); //Ensure that destinationCollection actually has the new docs. expression = StreamExpressionParser.parse("search(parallelDestinationCollection, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(5, tuples.size()); Tuple tuple = tuples.get(0); assert (tuple.getLong("id") == 0); assert (tuple.get("a_s").equals("hello0")); assert (tuple.getLong("a_i") == 0); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7")); tuple = tuples.get(1); assert (tuple.getLong("id") == 1); assert (tuple.get("a_s").equals("hello1")); assert (tuple.getLong("a_i") == 1); assert (tuple.getDouble("a_f") == 1.0); assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777")); tuple = tuples.get(2); assert (tuple.getLong("id") == 2); assert (tuple.get("a_s").equals("hello2")); assert (tuple.getLong("a_i") == 2); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77")); tuple = tuples.get(3); assert (tuple.getLong("id") == 3); assert (tuple.get("a_s").equals("hello3")); assert (tuple.getLong("a_i") == 3); assert (tuple.getDouble("a_f") == 3.0); assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2"); assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777")); tuple = tuples.get(4); assert (tuple.getLong("id") == 4); assert (tuple.get("a_s").equals("hello4")); assert (tuple.getLong("a_i") == 4); assert (tuple.getDouble("a_f") == 4.0); assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777")); } finally { CollectionAdminRequest.deleteCollection("parallelDestinationCollection").process(cluster.getSolrClient()); solrClientCache.close(); } } @Test public void testParallelDaemonCommitStream() throws Exception { CollectionAdminRequest.createCollection("parallelDestinationCollection1", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("parallelDestinationCollection1", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); new UpdateRequest() .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7") .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77") .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777") .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777") .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777") .commit(cluster.getSolrClient(), "collection1"); StreamExpression expression; TupleStream stream; Tuple t; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); String zkHost = cluster.getZkServer().getZkAddress(); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("parallelDestinationCollection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class) .withFunctionName("commit", CommitStream.class) .withFunctionName("parallel", ParallelStream.class) .withFunctionName("daemon", DaemonStream.class); try { //Copy all docs to destinationCollection String updateExpression = "daemon(commit(parallelDestinationCollection1, batchSize=0, zkHost=\"" + cluster.getZkServer().getZkAddress() + "\", update(parallelDestinationCollection1, batchSize=2, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\", partitionKeys=\"a_f\"))), runInterval=\"1000\", id=\"test\")"; TupleStream parallelUpdateStream = factory.constructStream("parallel(collection1, " + updateExpression + ", workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"batchNumber asc\")"); parallelUpdateStream.setStreamContext(streamContext); List<Tuple> tuples = getTuples(parallelUpdateStream); assert (tuples.size() == 2); //Lets sleep long enough for daemon updates to run. //Lets stop the daemons ModifiableSolrParams sParams = new ModifiableSolrParams(StreamingTest.mapParams(CommonParams.QT, "/stream", "action", "list")); int workersComplete = 0; for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { int iterations = 0; INNER: while (iterations == 0) { SolrStream solrStream = new SolrStream(jetty.getBaseUrl().toString() + "/collection1", sParams); solrStream.setStreamContext(streamContext); solrStream.open(); Tuple tupleResponse = solrStream.read(); if (tupleResponse.EOF) { solrStream.close(); break INNER; } else { long l = tupleResponse.getLong("iterations"); if (l > 0) { ++workersComplete; } else { try { Thread.sleep(1000); } catch (Exception e) { } } iterations = (int) l; solrStream.close(); } } } assertEquals(cluster.getJettySolrRunners().size(), workersComplete); //Lets stop the daemons sParams = new ModifiableSolrParams(); sParams.set(CommonParams.QT, "/stream"); sParams.set("action", "stop"); sParams.set("id", "test"); for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { SolrStream solrStream = new SolrStream(jetty.getBaseUrl() + "/collection1", sParams); solrStream.setStreamContext(streamContext); solrStream.open(); Tuple tupleResponse = solrStream.read(); solrStream.close(); } sParams = new ModifiableSolrParams(); sParams.set(CommonParams.QT, "/stream"); sParams.set("action", "list"); workersComplete = 0; for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { long stopTime = 0; INNER: while (stopTime == 0) { SolrStream solrStream = new SolrStream(jetty.getBaseUrl() + "/collection1", sParams); solrStream.setStreamContext(streamContext); solrStream.open(); Tuple tupleResponse = solrStream.read(); if (tupleResponse.EOF) { solrStream.close(); break INNER; } else { stopTime = tupleResponse.getLong("stopTime"); if (stopTime > 0) { ++workersComplete; } else { try { Thread.sleep(1000); } catch (Exception e) { } } solrStream.close(); } } } assertEquals(cluster.getJettySolrRunners().size(), workersComplete); //Ensure that destinationCollection actually has the new docs. expression = StreamExpressionParser.parse("search(parallelDestinationCollection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(5, tuples.size()); Tuple tuple = tuples.get(0); assert (tuple.getLong("id") == 0); assert (tuple.get("a_s").equals("hello0")); assert (tuple.getLong("a_i") == 0); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7")); tuple = tuples.get(1); assert (tuple.getLong("id") == 1); assert (tuple.get("a_s").equals("hello1")); assert (tuple.getLong("a_i") == 1); assert (tuple.getDouble("a_f") == 1.0); assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777")); tuple = tuples.get(2); assert (tuple.getLong("id") == 2); assert (tuple.get("a_s").equals("hello2")); assert (tuple.getLong("a_i") == 2); assert (tuple.getDouble("a_f") == 0.0); assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1"); assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77")); tuple = tuples.get(3); assert (tuple.getLong("id") == 3); assert (tuple.get("a_s").equals("hello3")); assert (tuple.getLong("a_i") == 3); assert (tuple.getDouble("a_f") == 3.0); assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2"); assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777")); tuple = tuples.get(4); assert (tuple.getLong("id") == 4); assert (tuple.get("a_s").equals("hello4")); assert (tuple.getLong("a_i") == 4); assert (tuple.getDouble("a_f") == 4.0); assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3"); assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777")); } finally { CollectionAdminRequest.deleteCollection("parallelDestinationCollection1").process(cluster.getSolrClient()); solrClientCache.close(); } } //////////////////////////////////////////// @Test public void testIntersectStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "setA", "a_i", "0") .add(id, "2", "a_s", "setA", "a_i", "1") .add(id, "3", "a_s", "setA", "a_i", "2") .add(id, "4", "a_s", "setA", "a_i", "3") .add(id, "5", "a_s", "setB", "a_i", "2") .add(id, "6", "a_s", "setB", "a_i", "3") .add(id, "7", "a_s", "setAB", "a_i", "0") .add(id, "8", "a_s", "setAB", "a_i", "6") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("intersect", IntersectStream.class); try { // basic expression = StreamExpressionParser.parse("intersect(" + "search(collection1, q=a_s:(setA || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc, a_s asc\")," + "search(collection1, q=a_s:(setB || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc\")," + "on=\"a_i\")"); stream = new IntersectStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 7, 3, 4, 8); } finally { solrClientCache.close(); } } @Test public void testClassifyStream() throws Exception { Assume.assumeTrue(!useAlias); CollectionAdminRequest.createCollection("modelCollection", "ml", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("modelCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); CollectionAdminRequest.createCollection("uknownCollection", "ml", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("uknownCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); CollectionAdminRequest.createCollection("checkpointCollection", "ml", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("checkpointCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); UpdateRequest updateRequest = new UpdateRequest(); for (int i = 0; i < 500; i+=2) { updateRequest.add(id, String.valueOf(i), "tv_text", "a b c c d", "out_i", "1"); updateRequest.add(id, String.valueOf(i+1), "tv_text", "a b e e f", "out_i", "0"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); updateRequest = new UpdateRequest(); updateRequest.add(id, String.valueOf(0), "text_s", "a b c c d"); updateRequest.add(id, String.valueOf(1), "text_s", "a b e e f"); updateRequest.commit(cluster.getSolrClient(), "uknownCollection"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/" + COLLECTIONORALIAS; TupleStream updateTrainModelStream; ModifiableSolrParams paramsLoc; StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("modelCollection", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("uknownCollection", cluster.getZkServer().getZkAddress()) .withFunctionName("features", FeaturesSelectionStream.class) .withFunctionName("train", TextLogitStream.class) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class); // train the model String textLogitExpression = "train(" + "collection1, " + "features(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4),"+ "q=\"*:*\", " + "name=\"model\", " + "field=\"tv_text\", " + "outcome=\"out_i\", " + "maxIterations=100)"; updateTrainModelStream = factory.constructStream("update(modelCollection, batchSize=5, "+textLogitExpression+")"); getTuples(updateTrainModelStream); cluster.getSolrClient().commit("modelCollection"); // classify unknown documents String expr = "classify(" + "model(modelCollection, id=\"model\", cacheMillis=5000)," + "topic(checkpointCollection, uknownCollection, q=\"*:*\", fl=\"text_s, id\", id=\"1000000\", initialCheckpoint=\"0\")," + "field=\"text_s\"," + "analyzerField=\"tv_text\")"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); SolrStream classifyStream = new SolrStream(url, paramsLoc); Map<String, Double> idToLabel = getIdToLabel(classifyStream, "probability_d"); assertEquals(idToLabel.size(), 2); assertEquals(1.0, idToLabel.get("0"), 0.001); assertEquals(0, idToLabel.get("1"), 0.001); // Add more documents and classify it updateRequest = new UpdateRequest(); updateRequest.add(id, String.valueOf(2), "text_s", "a b c c d"); updateRequest.add(id, String.valueOf(3), "text_s", "a b e e f"); updateRequest.commit(cluster.getSolrClient(), "uknownCollection"); classifyStream = new SolrStream(url, paramsLoc); idToLabel = getIdToLabel(classifyStream, "probability_d"); assertEquals(idToLabel.size(), 2); assertEquals(1.0, idToLabel.get("2"), 0.001); assertEquals(0, idToLabel.get("3"), 0.001); // Train another model updateRequest = new UpdateRequest(); updateRequest.deleteByQuery("*:*"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); updateRequest = new UpdateRequest(); for (int i = 0; i < 500; i+=2) { updateRequest.add(id, String.valueOf(i), "tv_text", "a b c c d", "out_i", "0"); updateRequest.add(id, String.valueOf(i+1), "tv_text", "a b e e f", "out_i", "1"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); updateTrainModelStream = factory.constructStream("update(modelCollection, batchSize=5, "+textLogitExpression+")"); getTuples(updateTrainModelStream); cluster.getSolrClient().commit("modelCollection"); // Add more documents and classify it updateRequest = new UpdateRequest(); updateRequest.add(id, String.valueOf(4), "text_s", "a b c c d"); updateRequest.add(id, String.valueOf(5), "text_s", "a b e e f"); updateRequest.commit(cluster.getSolrClient(), "uknownCollection"); //Sleep for 5 seconds to let model cache expire Thread.sleep(5100); classifyStream = new SolrStream(url, paramsLoc); idToLabel = getIdToLabel(classifyStream, "probability_d"); assertEquals(idToLabel.size(), 2); assertEquals(0, idToLabel.get("4"), 0.001); assertEquals(1.0, idToLabel.get("5"), 0.001); //Classify in parallel // classify unknown documents expr = "parallel(collection1, workers=2, sort=\"_version_ asc\", classify(" + "model(modelCollection, id=\"model\")," + "topic(checkpointCollection, uknownCollection, q=\"id:(4 5)\", fl=\"text_s, id, _version_\", id=\"2000000\", partitionKeys=\"id\", initialCheckpoint=\"0\")," + "field=\"text_s\"," + "analyzerField=\"tv_text\"))"; paramsLoc.set("expr", expr); classifyStream = new SolrStream(url, paramsLoc); idToLabel = getIdToLabel(classifyStream, "probability_d"); assertEquals(idToLabel.size(), 2); assertEquals(0, idToLabel.get("4"), 0.001); assertEquals(1.0, idToLabel.get("5"), 0.001); CollectionAdminRequest.deleteCollection("modelCollection").process(cluster.getSolrClient()); CollectionAdminRequest.deleteCollection("uknownCollection").process(cluster.getSolrClient()); CollectionAdminRequest.deleteCollection("checkpointCollection").process(cluster.getSolrClient()); } @Test public void testCalculatorStream() throws Exception { String expr = "select(calc(), add(1, 1) as result)"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; SolrStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); Tuple t = tuples.get(0); assertTrue(t.getLong("result").equals(2L)); } @Test public void testAnalyzeEvaluator() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "1", "test_t", "l b c d c"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); SolrClientCache cache = new SolrClientCache(); try { String expr = "cartesianProduct(search("+COLLECTIONORALIAS+", q=\"*:*\", fl=\"id, test_t\", sort=\"id desc\"), analyze(test_t, test_t) as test_t)"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; SolrStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 5); Tuple t = tuples.get(0); assertTrue(t.getString("test_t").equals("l")); assertTrue(t.getString("id").equals("1")); t = tuples.get(1); assertTrue(t.getString("test_t").equals("b")); assertTrue(t.getString("id").equals("1")); t = tuples.get(2); assertTrue(t.getString("test_t").equals("c")); assertTrue(t.getString("id").equals("1")); t = tuples.get(3); assertTrue(t.getString("test_t").equals("d")); assertTrue(t.getString("id").equals("1")); t = tuples.get(4); assertTrue(t.getString("test_t").equals("c")); assertTrue(t.getString("id").equals("1")); //Try with single param expr = "cartesianProduct(search("+COLLECTIONORALIAS+", q=\"*:*\", fl=\"id, test_t\", sort=\"id desc\"), analyze(test_t) as test_t)"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); context = new StreamContext(); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 5); t = tuples.get(0); assertTrue(t.getString("test_t").equals("l")); assertTrue(t.getString("id").equals("1")); t = tuples.get(1); assertTrue(t.getString("test_t").equals("b")); assertTrue(t.getString("id").equals("1")); t = tuples.get(2); assertTrue(t.getString("test_t").equals("c")); assertTrue(t.getString("id").equals("1")); t = tuples.get(3); assertTrue(t.getString("test_t").equals("d")); assertTrue(t.getString("id").equals("1")); t = tuples.get(4); assertTrue(t.getString("test_t").equals("c")); assertTrue(t.getString("id").equals("1")); //Try with null in the test_t field expr = "cartesianProduct(search("+COLLECTIONORALIAS+", q=\"*:*\", fl=\"id\", sort=\"id desc\"), analyze(test_t, test_t) as test_t)"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); context = new StreamContext(); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); //Test annotating tuple expr = "select(search("+COLLECTIONORALIAS+", q=\"*:*\", fl=\"id, test_t\", sort=\"id desc\"), analyze(test_t, test_t) as test1_t)"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); context = new StreamContext(); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); List l = (List)tuples.get(0).get("test1_t"); assertTrue(l.get(0).equals("l")); assertTrue(l.get(1).equals("b")); assertTrue(l.get(2).equals("c")); assertTrue(l.get(3).equals("d")); assertTrue(l.get(4).equals("c")); } finally { cache.close(); } } @Test public void testEchoStream() throws Exception { String expr = "echo(hello world)"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); String s = (String)tuples.get(0).get("echo"); assertTrue(s.equals("hello world")); expr = "echo(\"hello world\")"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); s = (String)tuples.get(0).get("echo"); assertTrue(s.equals("hello world")); expr = "echo(\"hello, world\")"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); s = (String)tuples.get(0).get("echo"); assertTrue(s.equals("hello, world")); expr = "echo(\"hello, \\\"t\\\" world\")"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); s = (String)tuples.get(0).get("echo"); assertTrue(s.equals("hello, \"t\" world")); expr = "parallel("+COLLECTIONORALIAS+", workers=2, sort=\"echo asc\", echo(\"hello, \\\"t\\\" world\"))"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 2); s = (String)tuples.get(0).get("echo"); assertTrue(s.equals("hello, \"t\" world")); s = (String)tuples.get(1).get("echo"); assertTrue(s.equals("hello, \"t\" world")); expr = "echo(\"tuytuy iuyiuyi iuyiuyiu iuyiuyiuyiu iuyi iuyiyiuy iuyiuyiu iyiuyiu iyiuyiuyyiyiu yiuyiuyi" + " yiuyiuyi yiuyiuuyiu yiyiuyiyiu iyiuyiuyiuiuyiu yiuyiuyi yiuyiy yiuiyiuiuy\")"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); s = (String)tuples.get(0).get("echo"); assertTrue(s.equals("tuytuy iuyiuyi iuyiuyiu iuyiuyiuyiu iuyi iuyiyiuy iuyiuyiu iyiuyiu iyiuyiuyyiyiu yiuyiuyi yiuyiuyi " + "yiuyiuuyiu yiyiuyiyiu iyiuyiuyiuiuyiu yiuyiuyi yiuyiy yiuiyiuiuy")); } @Test public void testEvalStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "hello", "test_t", "l b c d c"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "eval(select(echo(\"search("+COLLECTIONORALIAS+", q=\\\"*:*\\\", fl=id, sort=\\\"id desc\\\")\"), echo as expr_s))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); String s = (String)tuples.get(0).get("id"); assertTrue(s.equals("hello")); } private String getDateString(String year, String month, String day) { return year+"-"+month+"-"+day+"T00:00:00Z"; } @Test public void testTimeSeriesStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 4); assertTrue(tuples.get(0).get("test_dt").equals("2013-01-01T01:00:00Z")); assertTrue(tuples.get(0).getLong("count(*)").equals(100L)); assertTrue(tuples.get(0).getDouble("sum(price_f)").equals(10000D)); assertTrue(tuples.get(0).getDouble("max(price_f)").equals(100D)); assertTrue(tuples.get(0).getDouble("min(price_f)").equals(100D)); assertTrue(tuples.get(1).get("test_dt").equals("2014-01-01T01:00:00Z")); assertTrue(tuples.get(1).getLong("count(*)").equals(50L)); assertTrue(tuples.get(1).getDouble("sum(price_f)").equals(25000D)); assertTrue(tuples.get(1).getDouble("max(price_f)").equals(500D)); assertTrue(tuples.get(1).getDouble("min(price_f)").equals(500D)); assertTrue(tuples.get(2).get("test_dt").equals("2015-01-01T01:00:00Z")); assertTrue(tuples.get(2).getLong("count(*)").equals(50L)); assertTrue(tuples.get(2).getDouble("sum(price_f)").equals(15000D)); assertTrue(tuples.get(2).getDouble("max(price_f)").equals(300D)); assertTrue(tuples.get(2).getDouble("min(price_f)").equals(300D)); assertTrue(tuples.get(3).get("test_dt").equals("2016-01-01T01:00:00Z")); assertTrue(tuples.get(3).getLong("count(*)").equals(50L)); assertTrue(tuples.get(3).getDouble("sum(price_f)").equals(20000D)); assertTrue(tuples.get(3).getDouble("max(price_f)").equals(400D)); assertTrue(tuples.get(3).getDouble("min(price_f)").equals(400D)); } @Test public void testCorrelationStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; String cexpr = "let(a="+expr+", b=select("+expr+",mult(-1, count(*)) as nvalue), c=col(a, count(*)), d=col(b, nvalue), tuple(corr=corr(c,d)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); assertTrue(tuples.get(0).getDouble("corr").equals(-1.0D)); } @Test public void testCovariance() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; String cexpr = "let(a="+expr+", b=select("+expr+",mult(-1, count(*)) as nvalue), c=col(a, count(*)), d=col(b, nvalue), tuple(colc=c, cold=d, cov=cov(c,d)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); assertTrue(tuples.get(0).getDouble("cov").equals(-625.0D)); } @Test public void testDistance() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; String cexpr = "let(a="+expr+", b=select("+expr+",mult(-1, count(*)) as nvalue), c=col(a, count(*)), d=col(b, nvalue), tuple(colc=c, cold=d, cov=cov(c,d), dist=distance(c,d)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); assertTrue(tuples.get(0).getDouble("cov").equals(-625.0D)); assertTrue(tuples.get(0).getDouble("dist").equals(264.5751311064591D)); } @Test public void testReverse() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; String cexpr = "let(a="+expr+", c=col(a, max(price_f)), tuple(reverse=rev(c)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); List<Number> reverse = (List<Number>)tuples.get(0).get("reverse"); assertTrue(reverse.size() == 4); assertTrue(reverse.get(0).doubleValue() == 400D); assertTrue(reverse.get(1).doubleValue() == 300D); assertTrue(reverse.get(2).doubleValue() == 500D); assertTrue(reverse.get(3).doubleValue() == 100D); } @Test public void testCopyOf() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; String cexpr = "let(a="+expr+", c=col(a, max(price_f)), tuple(copy1=copyOf(c, 10), copy2=copyOf(c), copy3=copyOf(c, 2) ))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); List<Number> copy1 = (List<Number>)tuples.get(0).get("copy1"); assertTrue(copy1.size() == 10); assertTrue(copy1.get(0).doubleValue() == 100D); assertTrue(copy1.get(1).doubleValue() == 500D); assertTrue(copy1.get(2).doubleValue() == 300D); assertTrue(copy1.get(3).doubleValue() == 400D); assertTrue(copy1.get(4).doubleValue() == 0D); assertTrue(copy1.get(5).doubleValue() == 0D); assertTrue(copy1.get(6).doubleValue() == 0D); assertTrue(copy1.get(7).doubleValue() == 0D); assertTrue(copy1.get(8).doubleValue() == 0D); assertTrue(copy1.get(9).doubleValue() == 0D); List<Number> copy2 = (List<Number>)tuples.get(0).get("copy2"); assertTrue(copy2.size() == 4); assertTrue(copy2.get(0).doubleValue() == 100D); assertTrue(copy2.get(1).doubleValue() == 500D); assertTrue(copy2.get(2).doubleValue() == 300D); assertTrue(copy2.get(3).doubleValue() == 400D); List<Number> copy3 = (List<Number>)tuples.get(0).get("copy3"); assertTrue(copy3.size() == 2); assertTrue(copy3.get(0).doubleValue() == 100D); assertTrue(copy3.get(1).doubleValue() == 500D); } @Test public void testPercentiles() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<100) { i=i+2; updateRequest.add(id, "id_"+(i), "price_f", Integer.toString(i)); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "search("+COLLECTIONORALIAS+", q=\"*:*\", fl=\"price_f\", sort=\"price_f asc\", rows=\"200\")"; String cexpr = "let(a="+expr+", c=col(a, price_f), e=empiricalDistribution(c), " + "tuple(p1=percentile(e, 88), " + "p2=percentile(e, 2), " + "p3=percentile(e, 99), " + "p4=percentile(e, 77), " + "p5=percentile(e, 98)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); double percentile1 = tuples.get(0).getDouble("p1"); double percentile2 = tuples.get(0).getDouble("p2"); double percentile3 = tuples.get(0).getDouble("p3"); double percentile4 = tuples.get(0).getDouble("p4"); double percentile5 = tuples.get(0).getDouble("p5"); assertEquals(.88D, percentile1, 0.001); assertEquals(.0D, percentile2, 0.001); assertEquals(1.0D, percentile3, 0.001); assertEquals(.78D, percentile4, 0.001); assertEquals(.98D, percentile5, 0.001); } @Test public void testRankTransform() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; String cexpr = "let(a="+expr+", c=col(a, max(price_f)), tuple(reverse=rev(c), ranked=rank(c)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); List<Number> reverse = (List<Number>)tuples.get(0).get("reverse"); assertTrue(reverse.size() == 4); assertTrue(reverse.get(0).doubleValue() == 400D); assertTrue(reverse.get(1).doubleValue() == 300D); assertTrue(reverse.get(2).doubleValue() == 500D); assertTrue(reverse.get(3).doubleValue() == 100D); List<Number> ranked = (List<Number>)tuples.get(0).get("ranked"); assertTrue(ranked.size() == 4); assertTrue(ranked.get(0).doubleValue() == 1D); assertTrue(ranked.get(1).doubleValue() == 4D); assertTrue(ranked.get(2).doubleValue() == 2D); assertTrue(ranked.get(3).doubleValue() == 3D); } @Test public void testScale() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; String cexpr = "let(a="+expr+", c=col(a, max(price_f)), tuple(reverse=rev(c), scaled=scale(2, c)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); List<Number> reverse = (List<Number>)tuples.get(0).get("reverse"); assertTrue(reverse.size() == 4); assertTrue(reverse.get(0).doubleValue() == 400D); assertTrue(reverse.get(1).doubleValue() == 300D); assertTrue(reverse.get(2).doubleValue() == 500D); assertTrue(reverse.get(3).doubleValue() == 100D); List<Number> ranked = (List<Number>)tuples.get(0).get("scaled"); assertTrue(ranked.size() == 4); assertTrue(ranked.get(0).doubleValue() == 200D); assertTrue(ranked.get(1).doubleValue() == 1000D); assertTrue(ranked.get(2).doubleValue() == 600D); assertTrue(ranked.get(3).doubleValue() == 800D); } @Test public void testConvolution() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); int i=0; while(i<50) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2016", "5", "1"), "price_f", "400.00"); } while(i<100) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2015", "5", "1"), "price_f", "300.0"); } while(i<150) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2014", "5", "1"), "price_f", "500.0"); } while(i<250) { updateRequest.add(id, "id_"+(++i),"test_dt", getDateString("2013", "5", "1"), "price_f", "100.00"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "timeseries("+COLLECTIONORALIAS+", q=\"*:*\", start=\"2013-01-01T01:00:00.000Z\", " + "end=\"2016-12-01T01:00:00.000Z\", " + "gap=\"+1YEAR\", " + "field=\"test_dt\", " + "count(*), sum(price_f), max(price_f), min(price_f))"; String cexpr = "let(a="+expr+", b=select("+expr+",mult(2, count(*)) as nvalue), c=col(a, count(*)), d=col(b, nvalue), tuple(colc=c, cold=d, conv=conv(c,d)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); List<Number> convolution = (List<Number>)(tuples.get(0)).get("conv"); assertTrue(convolution.size() == 7); assertTrue(convolution.get(0).equals(20000D)); assertTrue(convolution.get(1).equals(20000D)); assertTrue(convolution.get(2).equals(25000D)); assertTrue(convolution.get(3).equals(30000D)); assertTrue(convolution.get(4).equals(15000D)); assertTrue(convolution.get(5).equals(10000D)); assertTrue(convolution.get(6).equals(5000D)); } @Test public void testRegressAndPredict() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "1", "price_f", "100.0", "col_s", "a", "order_i", "1"); updateRequest.add(id, "2", "price_f", "200.0", "col_s", "a", "order_i", "2"); updateRequest.add(id, "3", "price_f", "300.0", "col_s", "a", "order_i", "3"); updateRequest.add(id, "4", "price_f", "100.0", "col_s", "a", "order_i", "4"); updateRequest.add(id, "5", "price_f", "200.0", "col_s", "a", "order_i", "5"); updateRequest.add(id, "6", "price_f", "400.0", "col_s", "a", "order_i", "6"); updateRequest.add(id, "7", "price_f", "600.0", "col_s", "a", "order_i", "7"); updateRequest.add(id, "8", "price_f", "200.0", "col_s", "b", "order_i", "1"); updateRequest.add(id, "9", "price_f", "400.0", "col_s", "b", "order_i", "2"); updateRequest.add(id, "10", "price_f", "600.0", "col_s", "b", "order_i", "3"); updateRequest.add(id, "11", "price_f", "200.0", "col_s", "b", "order_i", "4"); updateRequest.add(id, "12", "price_f", "400.0", "col_s", "b", "order_i", "5"); updateRequest.add(id, "13", "price_f", "800.0", "col_s", "b", "order_i", "6"); updateRequest.add(id, "14", "price_f", "1200.0", "col_s", "b", "order_i", "7"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr1 = "search("+COLLECTIONORALIAS+", q=\"col_s:a\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; String expr2 = "search("+COLLECTIONORALIAS+", q=\"col_s:b\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; String cexpr = "let(a="+expr1+", b="+expr2+", c=col(a, price_f), d=col(b, price_f), e=regress(c, d), tuple(regress=e, p=predict(e, 300)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); Tuple tuple = tuples.get(0); Map regression = (Map)tuple.get("regress"); double slope = (double)regression.get("slope"); double intercept= (double) regression.get("intercept"); assertTrue(slope == 2.0D); assertTrue(intercept == 0.0D); double prediction = tuple.getDouble("p"); assertTrue(prediction == 600.0D); } @Test public void testLength() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "1", "price_f", "100.0", "col_s", "a", "order_i", "1"); updateRequest.add(id, "2", "price_f", "200.0", "col_s", "a", "order_i", "2"); updateRequest.add(id, "3", "price_f", "300.0", "col_s", "a", "order_i", "3"); updateRequest.add(id, "4", "price_f", "100.0", "col_s", "a", "order_i", "4"); updateRequest.add(id, "5", "price_f", "200.0", "col_s", "a", "order_i", "5"); updateRequest.add(id, "6", "price_f", "400.0", "col_s", "a", "order_i", "6"); updateRequest.add(id, "7", "price_f", "600.0", "col_s", "a", "order_i", "7"); updateRequest.add(id, "8", "price_f", "200.0", "col_s", "b", "order_i", "1"); updateRequest.add(id, "9", "price_f", "400.0", "col_s", "b", "order_i", "2"); updateRequest.add(id, "10", "price_f", "600.0", "col_s", "b", "order_i", "3"); updateRequest.add(id, "11", "price_f", "200.0", "col_s", "b", "order_i", "4"); updateRequest.add(id, "12", "price_f", "400.0", "col_s", "b", "order_i", "5"); updateRequest.add(id, "13", "price_f", "800.0", "col_s", "b", "order_i", "6"); updateRequest.add(id, "14", "price_f", "1200.0", "col_s", "b", "order_i", "7"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr1 = "search("+COLLECTIONORALIAS+", q=\"col_s:a\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; String expr2 = "search("+COLLECTIONORALIAS+", q=\"col_s:b\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; String cexpr = "let(a="+expr1+", b="+expr2+", c=col(a, price_f), d=col(b, price_f), e=regress(c, d), tuple(regress=e, p=predict(e, 300), l=length(d)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); Tuple tuple = tuples.get(0); Map regression = (Map)tuple.get("regress"); double slope = (double)regression.get("slope"); double intercept= (double) regression.get("intercept"); double length = tuple.getDouble("l"); assertTrue(slope == 2.0D); assertTrue(intercept == 0.0D); double prediction = tuple.getDouble("p"); assertTrue(prediction == 600.0D); assertTrue(length == 7); } @Test public void testNormalize() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "1", "price_f", "100.0", "col_s", "a", "order_i", "1"); updateRequest.add(id, "2", "price_f", "200.0", "col_s", "a", "order_i", "2"); updateRequest.add(id, "3", "price_f", "300.0", "col_s", "a", "order_i", "3"); updateRequest.add(id, "4", "price_f", "100.0", "col_s", "a", "order_i", "4"); updateRequest.add(id, "5", "price_f", "200.0", "col_s", "a", "order_i", "5"); updateRequest.add(id, "6", "price_f", "400.0", "col_s", "a", "order_i", "6"); updateRequest.add(id, "7", "price_f", "600.0", "col_s", "a", "order_i", "7"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr1 = "search("+COLLECTIONORALIAS+", q=\"col_s:a\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; String cexpr = "let(a="+expr1+", c=col(a, price_f), tuple(n=normalize(c), c=c))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); Tuple tuple = tuples.get(0); List<Double> col = (List<Double>)tuple.get("c"); List<Double> normalized = (List<Double>)tuple.get("n"); assertTrue(col.size() == normalized.size()); double total = 0.0D; for(double d : normalized) { total += d; } double mean = total/normalized.size(); assert(Math.round(mean) == 0); double sd = 0; for (int i = 0; i < normalized.size(); i++) { sd += Math.pow(normalized.get(i) - mean, 2) / normalized.size(); } double standardDeviation = Math.sqrt(sd); assertTrue(Math.round(standardDeviation) == 1); } @Test public void testListStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "hello", "test_t", "l b c d c"); updateRequest.add(id, "hello1", "test_t", "l b c d c"); updateRequest.add(id, "hello2", "test_t", "l b c d c"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr1 = "search("+COLLECTIONORALIAS+", q=\"id:hello\", fl=id, sort=\"id desc\")"; String expr2 = "search("+COLLECTIONORALIAS+", q=\"id:hello1\", fl=id, sort=\"id desc\")"; String expr3 = "search("+COLLECTIONORALIAS+", q=\"id:hello2\", fl=id, sort=\"id desc\")"; String cat = "list("+expr1+","+expr2+","+expr3+")"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cat); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 3); String s = (String)tuples.get(0).get("id"); assertTrue(s.equals("hello")); s = (String)tuples.get(1).get("id"); assertTrue(s.equals("hello1")); s = (String)tuples.get(2).get("id"); assertTrue(s.equals("hello2")); } @Test public void testCellStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "hello", "test_t", "l b c d c e"); updateRequest.add(id, "hello1", "test_t", "l b c d c"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "search("+COLLECTIONORALIAS+", q=\"*:*\", fl=\"id,test_t\", sort=\"id desc\")"; String cat = "cell(results,"+expr+")"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cat); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); List<Map> results = (List<Map>)tuples.get(0).get("results"); assertTrue(results.get(0).get("id").equals("hello1")); assertTrue(results.get(0).get("test_t").equals("l b c d c")); assertTrue(results.get(1).get("id").equals("hello")); assertTrue(results.get(1).get("test_t").equals("l b c d c e")); } @Test public void testTupleStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "hello", "test_t", "l b c d c e"); updateRequest.add(id, "hello1", "test_t", "l b c d c"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "search("+COLLECTIONORALIAS+", q=\"*:*\", fl=\"id,test_t\", sort=\"id desc\")"; //Add a Stream and an Evaluator to the Tuple. String cat = "tuple(results="+expr+", sum=add(1,1))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cat); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); List<Map> results = (List<Map>)tuples.get(0).get("results"); assertTrue(results.get(0).get("id").equals("hello1")); assertTrue(results.get(0).get("test_t").equals("l b c d c")); assertTrue(results.get(1).get("id").equals("hello")); assertTrue(results.get(1).get("test_t").equals("l b c d c e")); assertTrue(tuples.get(0).getLong("sum").equals(2L)); } @Test public void testLetStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "hello", "test_t", "l b c d c e", "test_i", "5"); updateRequest.add(id, "hello1", "test_t", "l b c d c", "test_i", "4"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); String expr = "search("+COLLECTIONORALIAS+", q=\"*:*\", fl=\"id,test_t, test_i\", sort=\"id desc\")"; String cat = "let(a ="+expr+", b = add(1,3), c=col(a, test_i), tuple(test = add(1,1), test1=b, results=a, test2=add(c)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cat); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); Tuple tuple1 = tuples.get(0); List<Map> results = (List<Map>)tuple1.get("results"); assertTrue(results.size() == 2); assertTrue(results.get(0).get("id").equals("hello1")); assertTrue(results.get(0).get("test_t").equals("l b c d c")); assertTrue(results.get(1).get("id").equals("hello")); assertTrue(results.get(1).get("test_t").equals("l b c d c e")); assertTrue(tuple1.getLong("test").equals(2L)); assertTrue(tuple1.getLong("test1").equals(4L)); assertTrue(tuple1.getLong("test2").equals(9L)); } @Test public void testConvertEvaluator() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); updateRequest.add(id, "1", "miles_i", "50"); updateRequest.add(id, "2", "miles_i", "70"); updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); //Test annotating tuple String expr = "select(calc(), convert(miles, kilometers, 10) as kilometers)"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; TupleStream solrStream = new SolrStream(url, paramsLoc); StreamContext context = new StreamContext(); solrStream.setStreamContext(context); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); double d = (double)tuples.get(0).get("kilometers"); assertTrue(d == (double)(10*1.61)); expr = "select(search("+COLLECTIONORALIAS+", q=\"*:*\", sort=\"miles_i asc\", fl=\"miles_i\"), convert(miles, kilometers, miles_i) as kilometers)"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); context = new StreamContext(); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 2); d = (double)tuples.get(0).get("kilometers"); assertTrue(d == (double)(50*1.61)); d = (double)tuples.get(1).get("kilometers"); assertTrue(d == (double)(70*1.61)); expr = "parallel("+COLLECTIONORALIAS+", workers=2, sort=\"miles_i asc\", select(search("+COLLECTIONORALIAS+", q=\"*:*\", partitionKeys=miles_i, sort=\"miles_i asc\", fl=\"miles_i\"), convert(miles, kilometers, miles_i) as kilometers))"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); context = new StreamContext(); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 2); d = (double)tuples.get(0).get("kilometers"); assertTrue(d == (double)(50*1.61)); d = (double)tuples.get(1).get("kilometers"); assertTrue(d == (double)(70*1.61)); expr = "select(stats("+COLLECTIONORALIAS+", q=\"*:*\", sum(miles_i)), convert(miles, kilometers, sum(miles_i)) as kilometers)"; paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", expr); paramsLoc.set("qt", "/stream"); solrStream = new SolrStream(url, paramsLoc); context = new StreamContext(); solrStream.setStreamContext(context); tuples = getTuples(solrStream); assertTrue(tuples.size() == 1); d = (double)tuples.get(0).get("kilometers"); assertTrue(d == (double)(120*1.61)); } @Test public void testExecutorStream() throws Exception { CollectionAdminRequest.createCollection("workQueue", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("workQueue", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); CollectionAdminRequest.createCollection("mainCorpus", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("mainCorpus", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); CollectionAdminRequest.createCollection("destination", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("destination", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); UpdateRequest workRequest = new UpdateRequest(); UpdateRequest dataRequest = new UpdateRequest(); for (int i = 0; i < 500; i++) { workRequest.add(id, String.valueOf(i), "expr_s", "update(destination, batchSize=50, search(mainCorpus, q=id:"+i+", rows=1, sort=\"id asc\", fl=\"id, body_t, field_i\"))"); dataRequest.add(id, String.valueOf(i), "body_t", "hello world "+i, "field_i", Integer.toString(i)); } workRequest.commit(cluster.getSolrClient(), "workQueue"); dataRequest.commit(cluster.getSolrClient(), "mainCorpus"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/destination"; TupleStream executorStream; ModifiableSolrParams paramsLoc; StreamFactory factory = new StreamFactory() .withCollectionZkHost("workQueue", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("mainCorpus", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("destination", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("executor", ExecutorStream.class) .withFunctionName("update", UpdateStream.class); String executorExpression = "executor(threads=3, search(workQueue, q=\"*:*\", fl=\"id, expr_s\", rows=1000, sort=\"id desc\"))"; executorStream = factory.constructStream(executorExpression); StreamContext context = new StreamContext(); SolrClientCache clientCache = new SolrClientCache(); context.setSolrClientCache(clientCache); executorStream.setStreamContext(context); getTuples(executorStream); //Destination collection should now contain all the records in the main corpus. cluster.getSolrClient().commit("destination"); paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", "search(destination, q=\"*:*\", fl=\"id, body_t, field_i\", rows=1000, sort=\"field_i asc\")"); paramsLoc.set("qt","/stream"); SolrStream solrStream = new SolrStream(url, paramsLoc); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 500); for(int i=0; i<500; i++) { Tuple tuple = tuples.get(i); long ivalue = tuple.getLong("field_i"); String body = tuple.getString("body_t"); assertTrue(ivalue == i); assertTrue(body.equals("hello world "+i)); } solrStream.close(); clientCache.close(); CollectionAdminRequest.deleteCollection("workQueue").process(cluster.getSolrClient()); CollectionAdminRequest.deleteCollection("mainCorpus").process(cluster.getSolrClient()); CollectionAdminRequest.deleteCollection("destination").process(cluster.getSolrClient()); } @Test public void testParallelExecutorStream() throws Exception { CollectionAdminRequest.createCollection("workQueue", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("workQueue", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); CollectionAdminRequest.createCollection("mainCorpus", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("mainCorpus", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); CollectionAdminRequest.createCollection("destination", "conf", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("destination", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); UpdateRequest workRequest = new UpdateRequest(); UpdateRequest dataRequest = new UpdateRequest(); for (int i = 0; i < 500; i++) { workRequest.add(id, String.valueOf(i), "expr_s", "update(destination, batchSize=50, search(mainCorpus, q=id:"+i+", rows=1, sort=\"id asc\", fl=\"id, body_t, field_i\"))"); dataRequest.add(id, String.valueOf(i), "body_t", "hello world "+i, "field_i", Integer.toString(i)); } workRequest.commit(cluster.getSolrClient(), "workQueue"); dataRequest.commit(cluster.getSolrClient(), "mainCorpus"); String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/destination"; TupleStream executorStream; ModifiableSolrParams paramsLoc; StreamFactory factory = new StreamFactory() .withCollectionZkHost("workQueue", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("mainCorpus", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("destination", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("executor", ExecutorStream.class) .withFunctionName("parallel", ParallelStream.class) .withFunctionName("update", UpdateStream.class); String executorExpression = "parallel(workQueue, workers=2, sort=\"EOF asc\", executor(threads=3, queueSize=100, search(workQueue, q=\"*:*\", fl=\"id, expr_s\", rows=1000, partitionKeys=id, sort=\"id desc\")))"; executorStream = factory.constructStream(executorExpression); StreamContext context = new StreamContext(); SolrClientCache clientCache = new SolrClientCache(); context.setSolrClientCache(clientCache); executorStream.setStreamContext(context); getTuples(executorStream); //Destination collection should now contain all the records in the main corpus. cluster.getSolrClient().commit("destination"); paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", "search(destination, q=\"*:*\", fl=\"id, body_t, field_i\", rows=1000, sort=\"field_i asc\")"); paramsLoc.set("qt", "/stream"); SolrStream solrStream = new SolrStream(url, paramsLoc); List<Tuple> tuples = getTuples(solrStream); assertTrue(tuples.size() == 500); for(int i=0; i<500; i++) { Tuple tuple = tuples.get(i); long ivalue = tuple.getLong("field_i"); String body = tuple.getString("body_t"); assertTrue(ivalue == i); assertTrue(body.equals("hello world " + i)); } solrStream.close(); clientCache.close(); CollectionAdminRequest.deleteCollection("workQueue").process(cluster.getSolrClient()); CollectionAdminRequest.deleteCollection("mainCorpus").process(cluster.getSolrClient()); CollectionAdminRequest.deleteCollection("destination").process(cluster.getSolrClient()); } private Map<String,Double> getIdToLabel(TupleStream stream, String outField) throws IOException { Map<String, Double> idToLabel = new HashMap<>(); List<Tuple> tuples = getTuples(stream); for (Tuple tuple : tuples) { idToLabel.put(tuple.getString("id"), tuple.getDouble(outField)); } return idToLabel; } @Test public void testBasicTextLogitStream() throws Exception { Assume.assumeTrue(!useAlias); CollectionAdminRequest.createCollection("destinationCollection", "ml", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("destinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); UpdateRequest updateRequest = new UpdateRequest(); for (int i = 0; i < 5000; i+=2) { updateRequest.add(id, String.valueOf(i), "tv_text", "a b c c d", "out_i", "1"); updateRequest.add(id, String.valueOf(i+1), "tv_text", "a b e e f", "out_i", "0"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("destinationCollection", cluster.getZkServer().getZkAddress()) .withFunctionName("features", FeaturesSelectionStream.class) .withFunctionName("train", TextLogitStream.class) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class); try { expression = StreamExpressionParser.parse("features(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4)"); stream = new FeaturesSelectionStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); HashSet<String> terms = new HashSet<>(); for (Tuple tuple : tuples) { terms.add((String) tuple.get("term_s")); } assertTrue(terms.contains("d")); assertTrue(terms.contains("c")); assertTrue(terms.contains("e")); assertTrue(terms.contains("f")); String textLogitExpression = "train(" + "collection1, " + "features(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4)," + "q=\"*:*\", " + "name=\"model\", " + "field=\"tv_text\", " + "outcome=\"out_i\", " + "maxIterations=100)"; stream = factory.constructStream(textLogitExpression); stream.setStreamContext(streamContext); tuples = getTuples(stream); Tuple lastTuple = tuples.get(tuples.size() - 1); List<Double> lastWeights = lastTuple.getDoubles("weights_ds"); Double[] lastWeightsArray = lastWeights.toArray(new Double[lastWeights.size()]); // first feature is bias value Double[] testRecord = {1.0, 1.17, 0.691, 0.0, 0.0}; double d = sum(multiply(testRecord, lastWeightsArray)); double prob = sigmoid(d); assertEquals(prob, 1.0, 0.1); // first feature is bias value Double[] testRecord2 = {1.0, 0.0, 0.0, 1.17, 0.691}; d = sum(multiply(testRecord2, lastWeightsArray)); prob = sigmoid(d); assertEquals(prob, 0, 0.1); stream = factory.constructStream("update(destinationCollection, batchSize=5, " + textLogitExpression + ")"); getTuples(stream); cluster.getSolrClient().commit("destinationCollection"); stream = factory.constructStream("search(destinationCollection, " + "q=*:*, " + "fl=\"iteration_i,* \", " + "rows=100, " + "sort=\"iteration_i desc\")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(100, tuples.size()); Tuple lastModel = tuples.get(0); ClassificationEvaluation evaluation = ClassificationEvaluation.create(lastModel.fields); assertTrue(evaluation.getF1() >= 1.0); assertEquals(Math.log(5000.0 / (2500 + 1)), lastModel.getDoubles("idfs_ds").get(0), 0.0001); // make sure the tuples is retrieved in correct order Tuple firstTuple = tuples.get(99); assertEquals(1L, (long) firstTuple.getLong("iteration_i")); } finally { CollectionAdminRequest.deleteCollection("destinationCollection").process(cluster.getSolrClient()); solrClientCache.close(); } } private double sigmoid(double in) { double d = 1.0 / (1+Math.exp(-in)); return d; } private double[] multiply(Double[] vec1, Double[] vec2) { double[] working = new double[vec1.length]; for(int i=0; i<vec1.length; i++) { working[i]= vec1[i]*vec2[i]; } return working; } private double sum(double[] vec) { double d = 0.0; for(double v : vec) { d += v; } return d; } @Test public void testParallelIntersectStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "setA", "a_i", "0") .add(id, "2", "a_s", "setA", "a_i", "1") .add(id, "3", "a_s", "setA", "a_i", "2") .add(id, "4", "a_s", "setA", "a_i", "3") .add(id, "5", "a_s", "setB", "a_i", "2") .add(id, "6", "a_s", "setB", "a_i", "3") .add(id, "7", "a_s", "setAB", "a_i", "0") .add(id, "8", "a_s", "setAB", "a_i", "6") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory streamFactory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("intersect", IntersectStream.class) .withFunctionName("parallel", ParallelStream.class); // basic StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { String zkHost = cluster.getZkServer().getZkAddress(); final TupleStream stream = streamFactory.constructStream("parallel(" + "collection1, " + "intersect(" + "search(collection1, q=a_s:(setA || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc, a_s asc\", partitionKeys=\"a_i\")," + "search(collection1, q=a_s:(setB || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc\", partitionKeys=\"a_i\")," + "on=\"a_i\")," + "workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_i asc\")"); stream.setStreamContext(streamContext); final List<Tuple> tuples = getTuples(stream); assert (tuples.size() == 5); assertOrder(tuples, 0, 7, 3, 4, 8); } finally { solrClientCache.close(); } } @Test public void testFeaturesSelectionStream() throws Exception { Assume.assumeTrue(!useAlias); CollectionAdminRequest.createCollection("destinationCollection", "ml", 2, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish("destinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); UpdateRequest updateRequest = new UpdateRequest(); for (int i = 0; i < 5000; i+=2) { updateRequest.add(id, String.valueOf(i), "whitetok", "a b c d", "out_i", "1"); updateRequest.add(id, String.valueOf(i+1), "whitetok", "a b e f", "out_i", "0"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withCollectionZkHost("destinationCollection", cluster.getZkServer().getZkAddress()) .withFunctionName("featuresSelection", FeaturesSelectionStream.class) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("update", UpdateStream.class); try { String featuresExpression = "featuresSelection(collection1, q=\"*:*\", featureSet=\"first\", field=\"whitetok\", outcome=\"out_i\", numTerms=4)"; // basic expression = StreamExpressionParser.parse(featuresExpression); stream = new FeaturesSelectionStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 4); assertTrue(tuples.get(0).get("term_s").equals("c")); assertTrue(tuples.get(1).get("term_s").equals("d")); assertTrue(tuples.get(2).get("term_s").equals("e")); assertTrue(tuples.get(3).get("term_s").equals("f")); // update expression = StreamExpressionParser.parse("update(destinationCollection, batchSize=5, " + featuresExpression + ")"); stream = new UpdateStream(expression, factory); stream.setStreamContext(streamContext); getTuples(stream); cluster.getSolrClient().commit("destinationCollection"); expression = StreamExpressionParser.parse("search(destinationCollection, q=featureSet_s:first, fl=\"index_i, term_s\", sort=\"index_i asc\")"); stream = new CloudSolrStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(4, tuples.size()); assertTrue(tuples.get(0).get("term_s").equals("c")); assertTrue(tuples.get(1).get("term_s").equals("d")); assertTrue(tuples.get(2).get("term_s").equals("e")); assertTrue(tuples.get(3).get("term_s").equals("f")); } finally { CollectionAdminRequest.deleteCollection("destinationCollection").process(cluster.getSolrClient()); solrClientCache.close(); } } @Test public void testSignificantTermsStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); for (int i = 0; i < 5000; i++) { updateRequest.add(id, "a"+i, "test_t", "a b c d m l"); } for (int i = 0; i < 5000; i++) { updateRequest.add(id, "b"+i, "test_t", "a b e f"); } for (int i = 0; i < 900; i++) { updateRequest.add(id, "c"+i, "test_t", "c"); } for (int i = 0; i < 600; i++) { updateRequest.add(id, "d"+i, "test_t", "d"); } for (int i = 0; i < 500; i++) { updateRequest.add(id, "e"+i, "test_t", "m"); } updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); TupleStream stream; List<Tuple> tuples; StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withDefaultZkHost(cluster.getZkServer().getZkAddress()) .withFunctionName("significantTerms", SignificantTermsStream.class); StreamContext streamContext = new StreamContext(); SolrClientCache cache = new SolrClientCache(); streamContext.setSolrClientCache(cache); try { String significantTerms = "significantTerms(collection1, q=\"id:a*\", field=\"test_t\", limit=3, minTermLength=1, maxDocFreq=\".5\")"; stream = factory.constructStream(significantTerms); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); assertTrue(tuples.get(0).get("term").equals("l")); assertTrue(tuples.get(0).getLong("background") == 5000); assertTrue(tuples.get(0).getLong("foreground") == 5000); assertTrue(tuples.get(1).get("term").equals("m")); assertTrue(tuples.get(1).getLong("background") == 5500); assertTrue(tuples.get(1).getLong("foreground") == 5000); assertTrue(tuples.get(2).get("term").equals("d")); assertTrue(tuples.get(2).getLong("background") == 5600); assertTrue(tuples.get(2).getLong("foreground") == 5000); //Test maxDocFreq significantTerms = "significantTerms(collection1, q=\"id:a*\", field=\"test_t\", limit=3, maxDocFreq=2650, minTermLength=1)"; stream = factory.constructStream(significantTerms); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 1); assertTrue(tuples.get(0).get("term").equals("l")); assertTrue(tuples.get(0).getLong("background") == 5000); assertTrue(tuples.get(0).getLong("foreground") == 5000); //Test maxDocFreq percentage significantTerms = "significantTerms(collection1, q=\"id:a*\", field=\"test_t\", limit=3, maxDocFreq=\".45\", minTermLength=1)"; stream = factory.constructStream(significantTerms); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 1); assertTrue(tuples.get(0).get("term").equals("l")); assertTrue(tuples.get(0).getLong("background") == 5000); assertTrue(tuples.get(0).getLong("foreground") == 5000); //Test min doc freq significantTerms = "significantTerms(collection1, q=\"id:a*\", field=\"test_t\", limit=3, minDocFreq=\"2700\", minTermLength=1, maxDocFreq=\".5\")"; stream = factory.constructStream(significantTerms); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 3); assertTrue(tuples.get(0).get("term").equals("m")); assertTrue(tuples.get(0).getLong("background") == 5500); assertTrue(tuples.get(0).getLong("foreground") == 5000); assertTrue(tuples.get(1).get("term").equals("d")); assertTrue(tuples.get(1).getLong("background") == 5600); assertTrue(tuples.get(1).getLong("foreground") == 5000); assertTrue(tuples.get(2).get("term").equals("c")); assertTrue(tuples.get(2).getLong("background") == 5900); assertTrue(tuples.get(2).getLong("foreground") == 5000); //Test min doc freq percent significantTerms = "significantTerms(collection1, q=\"id:a*\", field=\"test_t\", limit=3, minDocFreq=\".478\", minTermLength=1, maxDocFreq=\".5\")"; stream = factory.constructStream(significantTerms); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 1); assertTrue(tuples.get(0).get("term").equals("c")); assertTrue(tuples.get(0).getLong("background") == 5900); assertTrue(tuples.get(0).getLong("foreground") == 5000); //Test limit significantTerms = "significantTerms(collection1, q=\"id:a*\", field=\"test_t\", limit=2, minDocFreq=\"2700\", minTermLength=1, maxDocFreq=\".5\")"; stream = factory.constructStream(significantTerms); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 2); assertTrue(tuples.get(0).get("term").equals("m")); assertTrue(tuples.get(0).getLong("background") == 5500); assertTrue(tuples.get(0).getLong("foreground") == 5000); assertTrue(tuples.get(1).get("term").equals("d")); assertTrue(tuples.get(1).getLong("background") == 5600); assertTrue(tuples.get(1).getLong("foreground") == 5000); //Test term length significantTerms = "significantTerms(collection1, q=\"id:a*\", field=\"test_t\", limit=2, minDocFreq=\"2700\", minTermLength=2)"; stream = factory.constructStream(significantTerms); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 0); //Test with shards parameter List<String> shardUrls = TupleStream.getShards(cluster.getZkServer().getZkAddress(), COLLECTIONORALIAS, streamContext); Map<String, List<String>> shardsMap = new HashMap(); shardsMap.put("myCollection", shardUrls); StreamContext context = new StreamContext(); context.put("shards", shardsMap); context.setSolrClientCache(cache); significantTerms = "significantTerms(myCollection, q=\"id:a*\", field=\"test_t\", limit=2, minDocFreq=\"2700\", minTermLength=1, maxDocFreq=\".5\")"; stream = factory.constructStream(significantTerms); stream.setStreamContext(context); tuples = getTuples(stream); assert (tuples.size() == 2); assertTrue(tuples.get(0).get("term").equals("m")); assertTrue(tuples.get(0).getLong("background") == 5500); assertTrue(tuples.get(0).getLong("foreground") == 5000); assertTrue(tuples.get(1).get("term").equals("d")); assertTrue(tuples.get(1).getLong("background") == 5600); assertTrue(tuples.get(1).getLong("foreground") == 5000); //Execersise the /stream hander //Add the shards http parameter for the myCollection StringBuilder buf = new StringBuilder(); for (String shardUrl : shardUrls) { if (buf.length() > 0) { buf.append(","); } buf.append(shardUrl); } ModifiableSolrParams solrParams = new ModifiableSolrParams(); solrParams.add("qt", "/stream"); solrParams.add("expr", significantTerms); solrParams.add("myCollection.shards", buf.toString()); SolrStream solrStream = new SolrStream(shardUrls.get(0), solrParams); tuples = getTuples(solrStream); assert (tuples.size() == 2); assertTrue(tuples.get(0).get("term").equals("m")); assertTrue(tuples.get(0).getLong("background") == 5500); assertTrue(tuples.get(0).getLong("foreground") == 5000); assertTrue(tuples.get(1).get("term").equals("d")); assertTrue(tuples.get(1).getLong("background") == 5600); assertTrue(tuples.get(1).getLong("foreground") == 5000); //Add a negative test to prove that it cannot find slices if shards parameter is removed try { ModifiableSolrParams solrParamsBad = new ModifiableSolrParams(); solrParamsBad.add("qt", "/stream"); solrParamsBad.add("expr", significantTerms); solrStream = new SolrStream(shardUrls.get(0), solrParamsBad); tuples = getTuples(solrStream); throw new Exception("Exception should have been thrown above"); } catch (IOException e) { assertTrue(e.getMessage().contains("Slices not found for myCollection")); } } finally { cache.close(); } } @Test public void testComplementStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "setA", "a_i", "0") .add(id, "2", "a_s", "setA", "a_i", "1") .add(id, "3", "a_s", "setA", "a_i", "2") .add(id, "4", "a_s", "setA", "a_i", "3") .add(id, "5", "a_s", "setB", "a_i", "2") .add(id, "6", "a_s", "setB", "a_i", "3") .add(id, "9", "a_s", "setB", "a_i", "5") .add(id, "7", "a_s", "setAB", "a_i", "0") .add(id, "8", "a_s", "setAB", "a_i", "6") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("complement", ComplementStream.class); try { // basic expression = StreamExpressionParser.parse("complement(" + "search(collection1, q=a_s:(setA || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc, a_s asc\")," + "search(collection1, q=a_s:(setB || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc\")," + "on=\"a_i\")"); stream = new ComplementStream(expression, factory); stream.setStreamContext(streamContext); tuples = getTuples(stream); assert (tuples.size() == 1); assertOrder(tuples, 2); } finally { solrClientCache.close(); } } @Test public void testCartesianProductStream() throws Exception { new UpdateRequest() .add(id, "0", "a_ss", "a", "a_ss", "b", "a_ss", "c", "a_ss", "d", "a_ss", "e", "b_ls", "1", "b_ls", "2", "b_ls", "3") .add(id, "1", "a_ss", "a", "a_ss", "b", "a_ss", "c", "a_ss", "d", "a_ss", "e") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamExpression expression; TupleStream stream; List<Tuple> tuples; StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("cartesian", CartesianProductStream.class); // single selection, no sort try { stream = factory.constructStream("cartesian(" + "search(collection1, q=*:*, fl=\"id,a_ss\", sort=\"id asc\")," + "a_ss" + ")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(10, tuples.size()); assertOrder(tuples, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals("a", tuples.get(0).get("a_ss")); assertEquals("c", tuples.get(2).get("a_ss")); assertEquals("a", tuples.get(5).get("a_ss")); assertEquals("c", tuples.get(7).get("a_ss")); // single selection, sort stream = factory.constructStream("cartesian(" + "search(collection1, q=*:*, fl=\"id,a_ss\", sort=\"id asc\")," + "a_ss," + "productSort=\"a_ss DESC\"" + ")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(10, tuples.size()); assertOrder(tuples, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals("e", tuples.get(0).get("a_ss")); assertEquals("c", tuples.get(2).get("a_ss")); assertEquals("e", tuples.get(5).get("a_ss")); assertEquals("c", tuples.get(7).get("a_ss")); // multi selection, sort stream = factory.constructStream("cartesian(" + "search(collection1, q=*:*, fl=\"id,a_ss,b_ls\", sort=\"id asc\")," + "a_ss," + "b_ls," + "productSort=\"a_ss ASC\"" + ")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(20, tuples.size()); // (5 * 3) + 5 assertOrder(tuples, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals("a", tuples.get(0).get("a_ss")); assertEquals(1L, tuples.get(0).get("b_ls")); assertEquals("a", tuples.get(1).get("a_ss")); assertEquals(2L, tuples.get(1).get("b_ls")); assertEquals("a", tuples.get(2).get("a_ss")); assertEquals(3L, tuples.get(2).get("b_ls")); assertEquals("b", tuples.get(3).get("a_ss")); assertEquals(1L, tuples.get(3).get("b_ls")); assertEquals("b", tuples.get(4).get("a_ss")); assertEquals(2L, tuples.get(4).get("b_ls")); assertEquals("b", tuples.get(5).get("a_ss")); assertEquals(3L, tuples.get(5).get("b_ls")); // multi selection, sort stream = factory.constructStream("cartesian(" + "search(collection1, q=*:*, fl=\"id,a_ss,b_ls\", sort=\"id asc\")," + "a_ss," + "b_ls," + "productSort=\"a_ss ASC, b_ls DESC\"" + ")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(20, tuples.size()); // (5 * 3) + 5 assertOrder(tuples, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals("a", tuples.get(0).get("a_ss")); assertEquals(3L, tuples.get(0).get("b_ls")); assertEquals("a", tuples.get(1).get("a_ss")); assertEquals(2L, tuples.get(1).get("b_ls")); assertEquals("a", tuples.get(2).get("a_ss")); assertEquals(1L, tuples.get(2).get("b_ls")); assertEquals("b", tuples.get(3).get("a_ss")); assertEquals(3L, tuples.get(3).get("b_ls")); assertEquals("b", tuples.get(4).get("a_ss")); assertEquals(2L, tuples.get(4).get("b_ls")); assertEquals("b", tuples.get(5).get("a_ss")); assertEquals(1L, tuples.get(5).get("b_ls")); // multi selection, sort stream = factory.constructStream("cartesian(" + "search(collection1, q=*:*, fl=\"id,a_ss,b_ls\", sort=\"id asc\")," + "a_ss," + "b_ls," + "productSort=\"b_ls DESC\"" + ")"); stream.setStreamContext(streamContext); tuples = getTuples(stream); assertEquals(20, tuples.size()); // (5 * 3) + 5 assertOrder(tuples, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals("a", tuples.get(0).get("a_ss")); assertEquals(3L, tuples.get(0).get("b_ls")); assertEquals("b", tuples.get(1).get("a_ss")); assertEquals(3L, tuples.get(1).get("b_ls")); assertEquals("c", tuples.get(2).get("a_ss")); assertEquals(3L, tuples.get(2).get("b_ls")); assertEquals("d", tuples.get(3).get("a_ss")); assertEquals(3L, tuples.get(3).get("b_ls")); assertEquals("e", tuples.get(4).get("a_ss")); assertEquals(3L, tuples.get(4).get("b_ls")); assertEquals("a", tuples.get(5).get("a_ss")); assertEquals(2L, tuples.get(5).get("b_ls")); assertEquals("b", tuples.get(6).get("a_ss")); assertEquals(2L, tuples.get(6).get("b_ls")); assertEquals("c", tuples.get(7).get("a_ss")); assertEquals(2L, tuples.get(7).get("b_ls")); assertEquals("d", tuples.get(8).get("a_ss")); assertEquals(2L, tuples.get(8).get("b_ls")); assertEquals("e", tuples.get(9).get("a_ss")); assertEquals(2L, tuples.get(9).get("b_ls")); } finally { solrClientCache.close(); } } @Test public void testParallelComplementStream() throws Exception { new UpdateRequest() .add(id, "0", "a_s", "setA", "a_i", "0") .add(id, "2", "a_s", "setA", "a_i", "1") .add(id, "3", "a_s", "setA", "a_i", "2") .add(id, "4", "a_s", "setA", "a_i", "3") .add(id, "5", "a_s", "setB", "a_i", "2") .add(id, "6", "a_s", "setB", "a_i", "3") .add(id, "9", "a_s", "setB", "a_i", "5") .add(id, "7", "a_s", "setAB", "a_i", "0") .add(id, "8", "a_s", "setAB", "a_i", "6") .commit(cluster.getSolrClient(), COLLECTIONORALIAS); StreamFactory streamFactory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("search", CloudSolrStream.class) .withFunctionName("complement", ComplementStream.class) .withFunctionName("parallel", ParallelStream.class); StreamContext streamContext = new StreamContext(); SolrClientCache solrClientCache = new SolrClientCache(); streamContext.setSolrClientCache(solrClientCache); try { final String zkHost = cluster.getZkServer().getZkAddress(); final TupleStream stream = streamFactory.constructStream("parallel(" + "collection1, " + "complement(" + "search(collection1, q=a_s:(setA || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc, a_s asc\", partitionKeys=\"a_i\")," + "search(collection1, q=a_s:(setB || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc\", partitionKeys=\"a_i\")," + "on=\"a_i\")," + "workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_i asc\")"); stream.setStreamContext(streamContext); final List<Tuple> tuples = getTuples(stream); assert (tuples.size() == 1); assertOrder(tuples, 2); } finally { solrClientCache.close(); } } protected List<Tuple> getTuples(TupleStream tupleStream) throws IOException { List<Tuple> tuples = new ArrayList<Tuple>(); try { tupleStream.open(); for (Tuple t = tupleStream.read(); !t.EOF; t = tupleStream.read()) { tuples.add(t); } } finally { tupleStream.close(); } return tuples; } protected boolean assertOrder(List<Tuple> tuples, int... ids) throws Exception { return assertOrderOf(tuples, "id", ids); } protected boolean assertOrderOf(List<Tuple> tuples, String fieldName, int... ids) throws Exception { int i = 0; for(int val : ids) { Tuple t = tuples.get(i); String tip = t.getString(fieldName); if(!tip.equals(Integer.toString(val))) { throw new Exception("Found value:"+tip+" expecting:"+val); } ++i; } return true; } protected boolean assertMapOrder(List<Tuple> tuples, int... ids) throws Exception { int i = 0; for(int val : ids) { Tuple t = tuples.get(i); List<Map> tip = t.getMaps("group"); int id = (int)tip.get(0).get("id"); if(id != val) { throw new Exception("Found value:"+id+" expecting:"+val); } ++i; } return true; } protected boolean assertFields(List<Tuple> tuples, String ... fields) throws Exception{ for(Tuple tuple : tuples){ for(String field : fields){ if(!tuple.fields.containsKey(field)){ throw new Exception(String.format(Locale.ROOT, "Expected field '%s' not found", field)); } } } return true; } protected boolean assertNotFields(List<Tuple> tuples, String ... fields) throws Exception{ for(Tuple tuple : tuples){ for(String field : fields){ if(tuple.fields.containsKey(field)){ throw new Exception(String.format(Locale.ROOT, "Unexpected field '%s' found", field)); } } } return true; } protected boolean assertGroupOrder(Tuple tuple, int... ids) throws Exception { List<?> group = (List<?>)tuple.get("tuples"); int i=0; for(int val : ids) { Map<?,?> t = (Map<?,?>)group.get(i); Long tip = (Long)t.get("id"); if(tip.intValue() != val) { throw new Exception("Found value:"+tip.intValue()+" expecting:"+val); } ++i; } return true; } public boolean assertLong(Tuple tuple, String fieldName, long l) throws Exception { long lv = (long)tuple.get(fieldName); if(lv != l) { throw new Exception("Longs not equal:"+l+" : "+lv); } return true; } public boolean assertString(Tuple tuple, String fieldName, String expected) throws Exception { String actual = (String)tuple.get(fieldName); if( (null == expected && null != actual) || (null != expected && null == actual) || (null != expected && !expected.equals(actual))){ throw new Exception("Longs not equal:"+expected+" : "+actual); } return true; } protected boolean assertMaps(List<Map> maps, int... ids) throws Exception { if(maps.size() != ids.length) { throw new Exception("Expected id count != actual map count:"+ids.length+":"+maps.size()); } int i=0; for(int val : ids) { Map t = maps.get(i); String tip = (String)t.get("id"); if(!tip.equals(Integer.toString(val))) { throw new Exception("Found value:"+tip+" expecting:"+val); } ++i; } return true; } private boolean assertList(List list, Object... vals) throws Exception { if(list.size() != vals.length) { throw new Exception("Lists are not the same size:"+list.size() +" : "+vals.length); } for(int i=0; i<list.size(); i++) { Object a = list.get(i); Object b = vals[i]; if(!a.equals(b)) { throw new Exception("List items not equals:"+a+" : "+b); } } return true; } private void assertTopicRun(TupleStream stream, String... idArray) throws Exception { long version = -1; int count = 0; List<String> ids = new ArrayList(); for(String id : idArray) { ids.add(id); } try { stream.open(); while (true) { Tuple tuple = stream.read(); if (tuple.EOF) { break; } else { ++count; String id = tuple.getString("id"); if (!ids.contains(id)) { throw new Exception("Expecting id in topic run not found:" + id); } long v = tuple.getLong("_version_"); if (v < version) { throw new Exception("Out of order version in topic run:" + v); } } } } finally { stream.close(); } if(count != ids.size()) { throw new Exception("Wrong count in topic run:"+count); } } private void assertTopicSubject(TupleStream stream, String... textArray) throws Exception { long version = -1; int count = 0; List<String> texts = new ArrayList(); for(String text : textArray) { texts.add(text); } try { stream.open(); while (true) { Tuple tuple = stream.read(); if (tuple.EOF) { break; } else { ++count; String subject = tuple.getString("subject"); if (!texts.contains(subject)) { throw new Exception("Expecting subject in topic run not found:" + subject); } } } } finally { stream.close(); } } }