/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.client.solrj.io.stream; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.stream.Collectors; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.io.Tuple; import org.apache.solr.client.solrj.io.comp.ComparatorOrder; import org.apache.solr.client.solrj.io.comp.FieldComparator; import org.apache.solr.client.solrj.io.comp.MultipleFieldComparator; import org.apache.solr.client.solrj.io.comp.StreamComparator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation; import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter; import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.common.cloud.Aliases; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.SolrjNamedThreadFactory; import org.apache.solr.common.util.StrUtils; import static org.apache.solr.common.params.CommonParams.DISTRIB; import static org.apache.solr.common.params.CommonParams.SORT; /** * Connects to Zookeeper to pick replicas from a specific collection to send the query to. * Under the covers the SolrStream instances send the query to the replicas. * SolrStreams are opened using a thread pool, but a single thread is used * to iterate and merge Tuples from each SolrStream. **/ public class CloudSolrStream extends TupleStream implements Expressible { private static final long serialVersionUID = 1; protected String zkHost; protected String collection; protected SolrParams params; protected Map<String, String> fieldMappings; protected StreamComparator comp; private boolean trace; protected transient Map<String, Tuple> eofTuples; protected transient CloudSolrClient cloudSolrClient; protected transient List<TupleStream> solrStreams; protected transient TreeSet<TupleWrapper> tuples; protected transient StreamContext streamContext; // Used by parallel stream protected CloudSolrStream(){ } /** * @param zkHost Zookeeper ensemble connection string * @param collectionName Name of the collection to operate on * @param params Map<String, String> of parameter/value pairs * @throws IOException Something went wrong * <p> * This form does not allow specifying multiple clauses, say "fq" clauses, use the form that * takes a SolrParams. Transition code can call the preferred method that takes SolrParams * by calling CloudSolrStream(zkHost, collectionName, * new ModifiableSolrParams(SolrParams.toMultiMap(new NamedList(Map<String, String>))); * @deprecated Use the constructor that has a SolrParams obj rather than a Map */ @Deprecated public CloudSolrStream(String zkHost, String collectionName, Map params) throws IOException { init(collectionName, zkHost, new MapSolrParams(params)); } /** * @param zkHost Zookeeper ensemble connection string * @param collectionName Name of the collection to operate on * @param params Map<String, String[]> of parameter/value pairs * @throws IOException Something went wrong */ public CloudSolrStream(String zkHost, String collectionName, SolrParams params) throws IOException { init(collectionName, zkHost, params); } public CloudSolrStream(StreamExpression expression, StreamFactory factory) throws IOException{ // grab all parameters out String collectionName = factory.getValueOperand(expression, 0); List<StreamExpressionNamedParameter> namedParams = factory.getNamedOperands(expression); StreamExpressionNamedParameter aliasExpression = factory.getNamedOperand(expression, "aliases"); StreamExpressionNamedParameter zkHostExpression = factory.getNamedOperand(expression, "zkHost"); // Collection Name if(null == collectionName){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression)); } // Validate there are no unknown parameters - zkHost and alias are namedParameter so we don't need to count it twice if(expression.getParameters().size() != 1 + namedParams.size()){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - unknown operands found",expression)); } // Named parameters - passed directly to solr as solrparams if(0 == namedParams.size()){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one named parameter expected. eg. 'q=*:*'",expression)); } ModifiableSolrParams mParams = new ModifiableSolrParams(); for(StreamExpressionNamedParameter namedParam : namedParams){ if(!namedParam.getName().equals("zkHost") && !namedParam.getName().equals("aliases")){ mParams.add(namedParam.getName(), namedParam.getParameter().toString().trim()); } } // Aliases, optional, if provided then need to split if(null != aliasExpression && aliasExpression.getParameter() instanceof StreamExpressionValue){ fieldMappings = new HashMap<>(); for(String mapping : ((StreamExpressionValue)aliasExpression.getParameter()).getValue().split(",")){ String[] parts = mapping.trim().split("="); if(2 == parts.length){ fieldMappings.put(parts[0], parts[1]); } else{ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - alias expected of the format origName=newName",expression)); } } } // zkHost, optional - if not provided then will look into factory list to get String zkHost = null; if(null == zkHostExpression){ zkHost = factory.getCollectionZkHost(collectionName); if(zkHost == null) { zkHost = factory.getDefaultZkHost(); } } else if(zkHostExpression.getParameter() instanceof StreamExpressionValue){ zkHost = ((StreamExpressionValue)zkHostExpression.getParameter()).getValue(); } /* if(null == zkHost){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - zkHost not found for collection '%s'",expression,collectionName)); } */ // We've got all the required items init(collectionName, zkHost, mParams); } @Override public StreamExpression toExpression(StreamFactory factory) throws IOException { // functionName(collectionName, param1, param2, ..., paramN, sort="comp", [aliases="field=alias,..."]) // function name StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass())); // collection expression.addParameter(collection); // parameters ModifiableSolrParams mParams = new ModifiableSolrParams(SolrParams.toMultiMap(params.toNamedList())); for (Entry<String, String[]> param : mParams.getMap().entrySet()) { String value = String.join(",", param.getValue()); // SOLR-8409: This is a special case where the params contain a " character // Do note that in any other BASE streams with parameters where a " might come into play // that this same replacement needs to take place. value = value.replace("\"", "\\\""); expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), value)); } // zkHost expression.addParameter(new StreamExpressionNamedParameter("zkHost", zkHost)); // aliases if(null != fieldMappings && 0 != fieldMappings.size()){ StringBuilder sb = new StringBuilder(); for(Entry<String,String> mapping : fieldMappings.entrySet()){ if(sb.length() > 0){ sb.append(","); } sb.append(mapping.getKey()); sb.append("="); sb.append(mapping.getValue()); } expression.addParameter(new StreamExpressionNamedParameter("aliases", sb.toString())); } return expression; } @Override public Explanation toExplanation(StreamFactory factory) throws IOException { StreamExplanation explanation = new StreamExplanation(getStreamNodeId().toString()); explanation.setFunctionName(factory.getFunctionName(this.getClass())); explanation.setImplementingClass(this.getClass().getName()); explanation.setExpressionType(ExpressionType.STREAM_SOURCE); explanation.setExpression(toExpression(factory).toString()); // child is a datastore so add it at this point StreamExplanation child = new StreamExplanation(getStreamNodeId() + "-datastore"); child.setFunctionName(String.format(Locale.ROOT, "solr (%s)", collection)); child.setImplementingClass("Solr/Lucene"); child.setExpressionType(ExpressionType.DATASTORE); if(null != params){ ModifiableSolrParams mParams = new ModifiableSolrParams(params); child.setExpression(mParams.getMap().entrySet().stream().map(e -> String.format(Locale.ROOT, "%s=%s", e.getKey(), e.getValue())).collect(Collectors.joining(","))); } explanation.addChild(child); return explanation; } protected void init(String collectionName, String zkHost, SolrParams params) throws IOException { this.zkHost = zkHost; this.collection = collectionName; this.params = new ModifiableSolrParams(params); // If the comparator is null then it was not explicitly set so we will create one using the sort parameter // of the query. While doing this we will also take into account any aliases such that if we are sorting on // fieldA but fieldA is aliased to alias.fieldA then the comparater will be against alias.fieldA. if (params.get("q") == null) { throw new IOException("q param expected for search function"); } if (params.getParams("fl") == null) { throw new IOException("fl param expected for search function"); } String fls = String.join(",", params.getParams("fl")); if (params.getParams(SORT) == null) { throw new IOException("sort param expected for search function"); } String sorts = String.join(",", params.getParams(SORT)); this.comp = parseComp(sorts, fls); } public void setFieldMappings(Map<String, String> fieldMappings) { this.fieldMappings = fieldMappings; } public void setTrace(boolean trace) { this.trace = trace; } public void setStreamContext(StreamContext context) { this.streamContext = context; } /** * Opens the CloudSolrStream * ***/ public void open() throws IOException { this.tuples = new TreeSet(); this.solrStreams = new ArrayList(); this.eofTuples = Collections.synchronizedMap(new HashMap()); constructStreams(); openStreams(); } public Map getEofTuples() { return this.eofTuples; } public List<TupleStream> children() { return solrStreams; } private StreamComparator parseComp(String sort, String fl) throws IOException { String[] fls = fl.split(","); HashSet fieldSet = new HashSet(); for(String f : fls) { fieldSet.add(f.trim()); //Handle spaces in the field list. } String[] sorts = sort.split(","); StreamComparator[] comps = new StreamComparator[sorts.length]; for(int i=0; i<sorts.length; i++) { String s = sorts[i]; String[] spec = s.trim().split("\\s+"); //This should take into account spaces in the sort spec. if (spec.length != 2) { throw new IOException("Invalid sort spec:" + s); } String fieldName = spec[0].trim(); String order = spec[1].trim(); if(!fieldSet.contains(spec[0])) { throw new IOException("Fields in the sort spec must be included in the field list:"+spec[0]); } // if there's an alias for the field then use the alias if(null != fieldMappings && fieldMappings.containsKey(fieldName)){ fieldName = fieldMappings.get(fieldName); } comps[i] = new FieldComparator(fieldName, order.equalsIgnoreCase("asc") ? ComparatorOrder.ASCENDING : ComparatorOrder.DESCENDING); } if(comps.length > 1) { return new MultipleFieldComparator(comps); } else { return comps[0]; } } public static Collection<Slice> getSlices(String collectionName, ZkStateReader zkStateReader, boolean checkAlias) throws IOException { ClusterState clusterState = zkStateReader.getClusterState(); Map<String, DocCollection> collectionsMap = clusterState.getCollectionsMap(); // Check collection case sensitive if(collectionsMap.containsKey(collectionName)) { return collectionsMap.get(collectionName).getActiveSlices(); } // Check collection case insensitive for(String collectionMapKey : collectionsMap.keySet()) { if(collectionMapKey.equalsIgnoreCase(collectionName)) { return collectionsMap.get(collectionMapKey).getActiveSlices(); } } if(checkAlias) { // check for collection alias Aliases aliases = zkStateReader.getAliases(); String alias = aliases.getCollectionAlias(collectionName); if (alias != null) { Collection<Slice> slices = new ArrayList<>(); List<String> aliasList = StrUtils.splitSmart(alias, ",", true); for (String aliasCollectionName : aliasList) { // Add all active slices for this alias collection slices.addAll(collectionsMap.get(aliasCollectionName).getActiveSlices()); } return slices; } } throw new IOException("Slices not found for " + collectionName); } protected void constructStreams() throws IOException { try { List<String> shardUrls = getShards(this.zkHost, this.collection, this.streamContext); ModifiableSolrParams mParams = new ModifiableSolrParams(params); mParams = adjustParams(mParams); mParams.set(DISTRIB, "false"); // We are the aggregator. for(String shardUrl : shardUrls) { SolrStream solrStream = new SolrStream(shardUrl, mParams); if(streamContext != null) { solrStream.setStreamContext(streamContext); } solrStream.setFieldMappings(this.fieldMappings); solrStreams.add(solrStream); } } catch (Exception e) { throw new IOException(e); } } private void openStreams() throws IOException { ExecutorService service = ExecutorUtil.newMDCAwareCachedThreadPool(new SolrjNamedThreadFactory("CloudSolrStream")); try { List<Future<TupleWrapper>> futures = new ArrayList(); for (TupleStream solrStream : solrStreams) { StreamOpener so = new StreamOpener((SolrStream) solrStream, comp); Future<TupleWrapper> future = service.submit(so); futures.add(future); } try { for (Future<TupleWrapper> f : futures) { TupleWrapper w = f.get(); if (w != null) { tuples.add(w); } } } catch (Exception e) { throw new IOException(e); } } finally { service.shutdown(); } } /** * Closes the CloudSolrStream **/ public void close() throws IOException { if(solrStreams != null) { for (TupleStream solrStream : solrStreams) { solrStream.close(); } } } /** Return the stream sort - ie, the order in which records are returned */ public StreamComparator getStreamSort(){ return comp; } public Tuple read() throws IOException { return _read(); } protected Tuple _read() throws IOException { TupleWrapper tw = tuples.pollFirst(); if(tw != null) { Tuple t = tw.getTuple(); if (trace) { t.put("_COLLECTION_", this.collection); } if(tw.next()) { tuples.add(tw); } return t; } else { Map m = new HashMap(); if(trace) { m.put("_COLLECTION_", this.collection); } m.put("EOF", true); return new Tuple(m); } } protected class TupleWrapper implements Comparable<TupleWrapper> { private Tuple tuple; private SolrStream stream; private StreamComparator comp; public TupleWrapper(SolrStream stream, StreamComparator comp) { this.stream = stream; this.comp = comp; } public int compareTo(TupleWrapper w) { if(this == w) { return 0; } int i = comp.compare(tuple, w.tuple); if(i == 0) { return 1; } else { return i; } } public boolean equals(Object o) { return this == o; } public Tuple getTuple() { return tuple; } public boolean next() throws IOException { this.tuple = stream.read(); if(tuple.EOF) { eofTuples.put(stream.getBaseUrl(), tuple); } return !tuple.EOF; } } protected class StreamOpener implements Callable<TupleWrapper> { private SolrStream stream; private StreamComparator comp; public StreamOpener(SolrStream stream, StreamComparator comp) { this.stream = stream; this.comp = comp; } public TupleWrapper call() throws Exception { stream.open(); TupleWrapper wrapper = new TupleWrapper(stream, comp); if(wrapper.next()) { return wrapper; } else { return null; } } } protected ModifiableSolrParams adjustParams(ModifiableSolrParams params) { return params; } }