/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.client.solrj.io.stream; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Locale; import org.apache.solr.client.solrj.io.Tuple; import org.apache.solr.client.solrj.io.comp.FieldComparator; import org.apache.solr.client.solrj.io.comp.StreamComparator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation; import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter; import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; /** * Merges two or more streams together ordering the Tuples based on a Comparator. * All streams must be sorted by the fields being compared - this will be validated on construction. **/ public class MergeStream extends TupleStream implements Expressible { private static final long serialVersionUID = 1; private PushBackStream[] streams; private StreamComparator comp; public MergeStream(TupleStream streamA, TupleStream streamB, StreamComparator comp) throws IOException { init(comp, streamA, streamB); } public MergeStream(StreamComparator comp, TupleStream ... streams) throws IOException { init(comp, streams); } public MergeStream(StreamExpression expression,StreamFactory factory) throws IOException { // grab all parameters out List<StreamExpression> streamExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, TupleStream.class); StreamExpressionNamedParameter onExpression = factory.getNamedOperand(expression, "on"); // validate expression contains only what we want. if(expression.getParameters().size() != streamExpressions.size() + 1){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - unknown operands found", expression)); } if(streamExpressions.size() < 2){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at least two streams but found %d (must be PushBackStream types)",expression, streamExpressions.size())); } if(null == onExpression || !(onExpression.getParameter() instanceof StreamExpressionValue)){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting single 'on' parameter listing fields to merge on but didn't find one",expression)); } TupleStream[] streams = new TupleStream[streamExpressions.size()]; for(int idx = 0; idx < streamExpressions.size(); ++idx){ streams[idx] = factory.constructStream(streamExpressions.get(idx)); } init( factory.constructComparator(((StreamExpressionValue)onExpression.getParameter()).getValue(), FieldComparator.class), streams ); } private void init(StreamComparator comp, TupleStream ... streams) throws IOException { // All streams must both be sorted so that comp can be derived from for(TupleStream stream : streams){ if(!comp.isDerivedFrom(stream.getStreamSort())){ throw new IOException("Invalid MergeStream - all substream comparators (sort) must be a superset of this stream's comparator."); } } // Convert to PushBack streams so we can push back tuples this.streams = new PushBackStream[streams.length]; for(int idx = 0; idx < streams.length; ++idx){ this.streams[idx] = new PushBackStream(streams[idx]); } this.comp = comp; } @Override public StreamExpression toExpression(StreamFactory factory) throws IOException{ return toExpression(factory, true); } private StreamExpression toExpression(StreamFactory factory, boolean includeStreams) throws IOException { // function name StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass())); // streams for(PushBackStream stream : streams){ if(includeStreams){ expression.addParameter(stream.toExpression(factory)); } else{ expression.addParameter("<stream>"); } } // on expression.addParameter(new StreamExpressionNamedParameter("on",comp.toExpression(factory))); return expression; } @Override public Explanation toExplanation(StreamFactory factory) throws IOException { StreamExplanation explanation = new StreamExplanation(getStreamNodeId().toString()); explanation.setFunctionName(factory.getFunctionName(this.getClass())); explanation.setImplementingClass(this.getClass().getName()); explanation.setExpressionType(ExpressionType.STREAM_DECORATOR); explanation.setExpression(toExpression(factory, false).toString()); explanation.addHelper(comp.toExplanation(factory)); for(PushBackStream stream : streams){ explanation.addChild(stream.toExplanation(factory)); } return explanation; } public void setStreamContext(StreamContext context) { for(PushBackStream stream : streams){ stream.setStreamContext(context); } } public List<TupleStream> children() { List<TupleStream> l = new ArrayList<TupleStream>(); for(PushBackStream stream : streams){ l.add(stream); } return l; } public void open() throws IOException { for(PushBackStream stream : streams){ stream.open(); } } public void close() throws IOException { for(PushBackStream stream : streams){ stream.close(); } } public Tuple read() throws IOException { // might be able to optimize this by sorting the streams based on the next to read tuple from each. // if we can ensure the sort of the streams and update it in less than linear time then there would // be some performance gain. But, assuming the # of streams is kinda small then this might not be // worth it Tuple minimum = null; PushBackStream minimumStream = null; for(PushBackStream stream : streams){ Tuple current = stream.read(); if(current.EOF){ stream.pushBack(current); continue; } if(null == minimum){ minimum = current; minimumStream = stream; continue; } if(comp.compare(current, minimum) < 0){ // Push back on its stream minimumStream.pushBack(minimum); minimum = current; minimumStream = stream; continue; } else{ stream.pushBack(current); } } // If all EOF then min will be null, else min is the current minimum if(null == minimum){ // return EOF, doesn't matter which cause we're done return streams[0].read(); } return minimum; // Tuple a = streamA.read(); // Tuple b = streamB.read(); // // if(a.EOF && b.EOF) { // return a; // } // // if(a.EOF) { // streamA.pushBack(a); // return b; // } // // if(b.EOF) { // streamB.pushBack(b); // return a; // } // // int c = comp.compare(a,b); // // if(c < 0) { // streamB.pushBack(b); // return a; // } else { // streamA.pushBack(a); // return b; // } } /** Return the stream sort - ie, the order in which records are returned */ public StreamComparator getStreamSort(){ return comp; } public int getCost() { return 0; } }