/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.sparql.engine.iterator ; import java.util.* ; import org.apache.jena.atlas.data.* ; import org.apache.jena.atlas.lib.InternalErrorException ; import org.apache.jena.query.ARQ ; import org.apache.jena.query.SortCondition ; import org.apache.jena.riot.system.SerializationFactoryFinder ; import org.apache.jena.sparql.ARQException ; import org.apache.jena.sparql.engine.ExecutionContext ; import org.apache.jena.sparql.engine.QueryIterator ; import org.apache.jena.sparql.engine.binding.Binding ; import org.apache.jena.sparql.engine.binding.BindingComparator ; import org.apache.jena.sparql.engine.binding.BindingProjectNamed ; /** * A QueryIterator that suppresses items already seen. This will stream results * until a threshold is passed. At that point, it will fill a disk-backed * {@link DistinctDataNet}, then yield * not return any results until the input iterator has been exhausted. * * @see DistinctDataBag */ public class QueryIterDistinct extends QueryIter1 { private long memThreshold = Long.MAX_VALUE ; // Default "off" value. /*package*/ DistinctDataBag<Binding> db = null ; private Iterator<Binding> iterator = null ; private Set<Binding> seen = new HashSet<>() ; private Binding slot = null ; public QueryIterDistinct(QueryIterator qIter, ExecutionContext execCxt) { super(qIter, execCxt) ; if ( execCxt != null ) { memThreshold = execCxt.getContext().getLong(ARQ.spillToDiskThreshold, memThreshold) ; if ( memThreshold < 0 ) throw new ARQException("BAd spillToDiskThreshold: "+memThreshold) ; } } @Override protected boolean hasNextBinding() { if ( slot != null ) return true ; if ( iterator != null ) // Databag active. return iterator.hasNext() ; // At this point, we are currently in the initial pre-threshold mode. if ( seen.size() < memThreshold ) { Binding b = getInputNextUnseen() ; if ( b == null ) return false ; seen.add(b) ; slot = b ; return true ; } // Hit the threshold. loadDataBag() ; // Switch to iterating from the data bag. iterator = db.iterator() ; // Leave slot null. return iterator.hasNext() ; } /** Load the data bag with. Filter incoming by the already seen in-memory elements */ private void loadDataBag() { ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(super.getExecContext().getContext()) ; Comparator<Binding> comparator = new BindingComparator(new ArrayList<SortCondition>(), super.getExecContext()) ; this.db = BagFactory.newDistinctBag(policy, SerializationFactoryFinder.bindingSerializationFactory(), comparator) ; for(;;) { Binding b = getInputNextUnseen() ; if ( b == null ) break ; db.add(b) ; } } /** Return the next binding from the input filtered by seen. * This does not update seen. * Returns null on end of input. */ private Binding getInputNextUnseen() { while( getInput().hasNext() ) { Binding b = getInputNext() ; if ( seen.contains(b) ) continue ; return b ; } return null ; } /** Return the binding from the input, hiding any variables to be ignored. */ private Binding getInputNext() { Binding b = getInput().next() ; // Hide unnamed and internal variables. b = new BindingProjectNamed(b) ; return b ; } @Override protected Binding moveToNextBinding() { if ( slot != null ) { Binding b = slot ; slot = null ; return b ; } if ( iterator != null ) { Binding b = iterator.next() ; return b ; } throw new InternalErrorException() ; } @Override protected void closeSubIterator() { if ( db != null ) { iterator = null ; db.close() ; } db = null ; } // We don't need to do anything. We're a QueryIter1 // and that handles the cancellation of the wrapped // iterator. @Override protected void requestSubCancel() { } }