/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.relational.cursors;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import xxl.core.collections.bags.ArrayBag;
import xxl.core.collections.bags.Bag;
import xxl.core.collections.bags.ListBag;
import xxl.core.cursors.AbstractCursor;
import xxl.core.cursors.MetaDataCursor;
import xxl.core.functions.Function;
import xxl.core.predicates.AbstractPredicate;
import xxl.core.predicates.Equal;
import xxl.core.relational.cursors.SortMergeJoin.PredicateBasedSA;
import xxl.core.relational.metaData.MergedResultSetMetaData;
import xxl.core.relational.metaData.ResultSetMetaDatas;
import xxl.core.relational.tuples.ArrayTuple;
import xxl.core.relational.tuples.Tuple;
import xxl.core.relational.tuples.Tuples;
import xxl.core.util.metaData.CompositeMetaData;
import xxl.core.util.metaData.MetaDataException;
/**
* This class provides the division operator of the relational algebra.
*
* <p>Let us consider the division of <tt>R</tt> and <tt>S</tt> with
* <tt>R[A1,...,Ai,B1,...,Bj]</tt> and <tt>S[B1,...,Bj]</tt>. Then the result
* <tt>Res</tt> has schema <tt>Res[A1,...,Ai]</tt>. For every result tuple
* <tt>res</tt> and every tuple <tt>s</tt> from <tt>S</tt>, the tuple
* <tt>(res,s)</tt> is contained in <tt>R</tt>.</p>
*
* <p>In other words, the division computes all elements of <tt>R</tt>
* projected to the <tt>A</tt>-attributes, that are contained in <tt>R</tt>
* with <b>all</b> tuples of <tt>S</tt>.</p>
*
* <p>The division is an operation that can deliver interesting information
* from databases!</p>
*
* <p>This class performs a sort-merge join (<tt>R natural join S</tt>). Then,
* it outputs all elements projected to <tt>[A1,...,Ai]</tt> that occur exactly
* <tt>|S|</tt> times in the join result.</p>
*/
public class SortBasedDivision extends AbstractCursor<Tuple> implements MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> {
/**
* A counter saving the number of tuples of the second input.
*/
protected int counter = 0;
/**
* A metadata cursor representing the result of the division.
*/
protected MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> result;
/**
* Constructs an instance of the sort-based division operator.
*
* @param sortedDistinctCursor1 the sorted metadata cursor (by the all
* attributes: <tt>order by A1, ..., Ai, B1, ..., Bj</tt>)
* containing elements of the first input relation (no duplicates
* allowed).
* @param sortedDistinctCursor2 the sorted metadata cursor (by the quotient
* attributes) containing elements of the second input relation (no
* duplicates allowed).
* @param bag a bag that is used for the sweep area of the internal
* {@link SortMergeJoin} operator.
* @param tupleFactory a function that maps a list of objects (column
* values) to a new tuple. Classes implementing the Tuple interface
* should provide factory methods for this task. If
* <code>null</code> is passed, a factory method producing
* {@link ArrayTuple array-tuples} is used.
*/
public SortBasedDivision(MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> sortedDistinctCursor1, MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> sortedDistinctCursor2, Bag<Tuple> bag, Function<Object, ? extends Tuple> tupleFactory) {
try {
SortMergeJoin join = new SortMergeJoin(
sortedDistinctCursor1,
sortedDistinctCursor2,
// SweepArea0 would always contain exactly one element.
// But no solution can be generated by this SweepArea.
// The use of an EmptyBag would be possible (without exceptions)!
new PredicateBasedSA<Tuple>(
new ArrayBag<Tuple>(1),
SortMergeJoin.computeMetaDataPredicate(
sortedDistinctCursor1,
sortedDistinctCursor2,
SortMergeJoin.Type.NATURAL_JOIN
),
0
) {
@Override
public void reorganize(Tuple currentStatus, int ID) throws IllegalStateException {
clear();
}
},
new PredicateBasedSA<Tuple>(
bag,
SortMergeJoin.computeMetaDataPredicate(
sortedDistinctCursor1,
sortedDistinctCursor2,
SortMergeJoin.Type.NATURAL_JOIN
),
1
) {
@Override
public void insert(Tuple tuple) {
super.insert(tuple);
counter++;
}
@Override
public void reorganize(Tuple currentStatus, int ID) throws IllegalStateException {
// nothing to reorganize
}
},
new Comparator<Tuple>() {
public int compare(Tuple tuple1, Tuple tuple2) {
return 1;
}
},
tupleFactory,
SortMergeJoin.Type.NATURAL_JOIN
);
MergedResultSetMetaData joinMetaData = (MergedResultSetMetaData)ResultSetMetaDatas.getResultSetMetaData(join);
ArrayList<Integer> indices = new ArrayList<Integer>();
columns: for (int column = 1; column <= joinMetaData.getColumnCount(); column++) {
Iterator<Integer> metadatas = joinMetaData.originalMetaDataIndices(column);
while (metadatas.hasNext())
if (metadatas.next() == 1)
continue columns;
indices.add(column);
}
int[] projectedColumns = new int[indices.size()];
for (int i = 0; i < projectedColumns.length; projectedColumns[i] = indices.get(i++));
final Comparator<Tuple> tupleComparator = Tuples.getTupleComparator(projectedColumns);
this.result = new Selection(
new Projection(
join,
tupleFactory,
projectedColumns
),
new AbstractPredicate<Tuple>() {
protected Tuple last = null;
protected int noOfResults = 0;
@Override
public boolean invoke(Tuple tuple) {
if (last == null || tupleComparator.compare(last, tuple) != 0)
noOfResults = 0;
noOfResults++;
last = tuple;
return noOfResults == counter;
}
}
);
}
catch (SQLException sqle) {
throw new MetaDataException("sql exception occured during meta data construction: \'" + sqle.getMessage() + "\'");
}
}
/**
* Constructs an instance of the sort-based division operator. An
* {@link ListBag list-bag} is used for the sweep area of the internal
* {@link SortMergeJoin} operator.
*
* @param sortedCursor1 the sorted metadata cursor (by the all
* attributes: <tt>order by A1, ..., Ai, B1, ..., Bj</tt>)
* containing elements of the first input relation.
* @param sortedCursor2 the sorted metadata cursor (by the quotient
* attributes) containing elements of the second input relation.
* @param tupleFactory a function that maps a list of objects (column
* values) to a new tuple. Classes implementing the Tuple interface
* should provide factory methods for this task. If
* <code>null</code> is passed, a factory method producing
* {@link ArrayTuple array-tuples} is used.
*/
public SortBasedDivision(MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> sortedCursor1, MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> sortedCursor2, Function<Object, ? extends Tuple> tupleFactory) {
this(
new SortBasedDistinct(sortedCursor1, Equal.DEFAULT_INSTANCE),
new SortBasedDistinct(sortedCursor2, Equal.DEFAULT_INSTANCE),
new ListBag<Tuple>(),
tupleFactory
);
}
/**
* Constructs an instance of the sort-based division operator.
*
* @param sortedDistinctResultSet1 the sorted result set (by the all
* attributes: <tt>order by A1, ..., Ai, B1, ..., Bj</tt>)
* containing elements of the first input relation (no duplicates
* allowed).
* @param sortedDistinctResultSet2 the sorted result set (by the quotient
* attributes) containing elements of the second input relation (no
* duplicates allowed).
* @param bag a bag that is used for the sweep area of the internal
* {@link SortMergeJoin} operator.
* @param tupleFactory a function that maps a list of objects (column
* values) to a new tuple. Classes implementing the Tuple interface
* should provide factory methods for this task. If
* <code>null</code> is passed, a factory method producing
* {@link ArrayTuple array-tuples} is used.
*/
public SortBasedDivision(ResultSet sortedDistinctResultSet1, ResultSet sortedDistinctResultSet2, Bag<Tuple> bag, Function<Object, ? extends Tuple> tupleFactory) {
this(
new ResultSetMetaDataCursor(sortedDistinctResultSet1),
new ResultSetMetaDataCursor(sortedDistinctResultSet2),
bag,
tupleFactory
);
}
/**
* Constructs an instance of the sort-based division operator. An
* {@link ListBag list} is used for the sweep area of the internal
* {@link SortMergeJoin} operator.
*
* @param sortedResultSet1 the sorted result set (by the all attributes:
* <tt>order by A1, ..., Ai, B1, ..., Bj</tt>) containing elements
* of the first input relation.
* @param sortedResultSet2 the sorted result set (by the quotient
* attributes) containing elements of the second input relation.
* @param tupleFactory a function that maps a list of objects (column
* values) to a new tuple. Classes implementing the Tuple interface
* should provide factory methods for this task. If
* <code>null</code> is passed, a factory method producing
* {@link ArrayTuple array-tuples} is used.
*/
public SortBasedDivision(ResultSet sortedResultSet1, ResultSet sortedResultSet2, Function<Object, ? extends Tuple> tupleFactory) {
this(
new ResultSetMetaDataCursor(sortedResultSet1),
new ResultSetMetaDataCursor(sortedResultSet2),
tupleFactory
);
}
/**
* Returns <code>true</code> if the iteration has more elements. (In other
* words, returns <code>true</code> if <code>next</code> or
* <code>peek</code> would return an element rather than throwing an
* exception.)
*
* @return <code>true</code> if the cursor has more elements.
*/
@Override
public boolean hasNextObject() {
return result.hasNext();
}
/**
* Returns the next element in the iteration. This element will be
* accessible by some of the cursor's methods, e.g., <code>update</code> or
* <code>remove</code>, until a call to <code>next</code> or
* <code>peek</code> occurs. This is calling <code>next</code> or
* <code>peek</code> proceeds the iteration and therefore its previous
* element will not be accessible any more.
*
* @return the next element in the iteration.
*/
@Override
public Tuple nextObject() {
return result.next();
}
/**
* Resets the cursor to its initial state such that the caller is able to
* traverse the underlying data structure again without constructing a new
* cursor (optional operation). The modifications, removes and updates
* concerning the underlying data structure, are still persistent.
*
* <p>Note, that this operation is optional and does not work for this
* cursor.</p>
*
* @throws UnsupportedOperationException if the <code>reset</code>
* operation is not supported by the cursor.
*/
@Override
public void reset() throws UnsupportedOperationException {
super.reset();
result.reset();
}
/**
* Returns <code>true</code> if the <code>reset</code> operation is
* supported by the cursor. Otherwise it returns <code>false</code>.
*
* @return <code>true</code> if the <code>reset</code> operation is
* supported by the cursor, otherwise <code>false</code>.
*/
@Override
public boolean supportsReset() {
return true;
}
/**
* Closes the cursor, i.e., signals the cursor to clean up resources, close
* files, etc. When a cursor has been closed calls to methods like
* <code>next</code> or <code>peek</code> are not guaranteed to yield
* proper results. Multiple calls to <code>close</code> do not have any
* effect, i.e., if <code>close</code> was called the cursor remains in the
* state <i>closed</i>.
*
* <p>Note, that a closed cursor usually cannot be opened again because of
* the fact that its state generally cannot be restored when resources are
* released respectively files are closed.</p>
*/
@Override
public void close() {
if (isClosed)
return;
super.close();
result.close();
}
/**
* Returns the metadata information for this metadata-cursor as a composite
* metadata ({@link CompositeMetaData}).
*
* @return the metadata information for this metadata-cursor as a composite
* metadata ({@link CompositeMetaData}).
*/
public CompositeMetaData<Object, Object> getMetaData() {
return result.getMetaData();
}
}