SortBasedDivision.java example

Explorer
xxl-master
/* XXL: The eXtensible and fleXible Library for data processing

Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
                        Head of the Database Research Group
                        Department of Mathematics and Computer Science
                        University of Marburg
                        Germany

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library;  If not, see <http://www.gnu.org/licenses/>. 

    http://code.google.com/p/xxl/

*/

package xxl.core.relational.cursors;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;

import xxl.core.collections.bags.ArrayBag;
import xxl.core.collections.bags.Bag;
import xxl.core.collections.bags.ListBag;
import xxl.core.cursors.AbstractCursor;
import xxl.core.cursors.MetaDataCursor;
import xxl.core.functions.Function;
import xxl.core.predicates.AbstractPredicate;
import xxl.core.predicates.Equal;
import xxl.core.relational.cursors.SortMergeJoin.PredicateBasedSA;
import xxl.core.relational.metaData.MergedResultSetMetaData;
import xxl.core.relational.metaData.ResultSetMetaDatas;
import xxl.core.relational.tuples.ArrayTuple;
import xxl.core.relational.tuples.Tuple;
import xxl.core.relational.tuples.Tuples;
import xxl.core.util.metaData.CompositeMetaData;
import xxl.core.util.metaData.MetaDataException;

/**
 * This class provides the division operator of the relational algebra.
 * 
 * <p>Let us consider the division of <tt>R</tt> and <tt>S</tt> with
 * <tt>R[A1,...,Ai,B1,...,Bj]</tt> and <tt>S[B1,...,Bj]</tt>. Then the result
 * <tt>Res</tt> has schema <tt>Res[A1,...,Ai]</tt>. For every result tuple
 * <tt>res</tt> and every tuple <tt>s</tt> from <tt>S</tt>, the tuple
 * <tt>(res,s)</tt> is contained in <tt>R</tt>.</p>
 * 
 * <p>In other words, the division computes all elements of <tt>R</tt>
 * projected to the <tt>A</tt>-attributes, that are contained in <tt>R</tt>
 * with <b>all</b> tuples of <tt>S</tt>.</p>
 * 
 * <p>The division is an operation that can deliver interesting information
 * from databases!</p>
 * 
 * <p>This class performs a sort-merge join (<tt>R natural join S</tt>). Then,
 * it outputs all elements projected to <tt>[A1,...,Ai]</tt> that occur exactly
 * <tt>|S|</tt> times in the join result.</p>
 */
public class SortBasedDivision extends AbstractCursor<Tuple> implements MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> {

	/**
	 * A counter saving the number of tuples of the second input.
	 */
	protected int counter = 0;
	
	/**
	 * A metadata cursor representing the result of the division.
	 */
	protected MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> result;

	/**
	 * Constructs an instance of the sort-based division operator.
	 *
	 * @param sortedDistinctCursor1 the sorted metadata cursor (by the all
	 *        attributes: <tt>order by A1, ..., Ai, B1, ..., Bj</tt>)
	 *        containing elements of the first input relation (no duplicates
	 *        allowed).
	 * @param sortedDistinctCursor2 the sorted metadata cursor (by the quotient
	 *        attributes) containing elements of the second input relation (no
	 *        duplicates allowed).
	 * @param bag a bag that is used for the sweep area of the internal
	 *        {@link SortMergeJoin} operator.
	 * @param tupleFactory a function that maps a list of objects (column
	 *        values) to a new tuple. Classes implementing the Tuple interface
	 *        should provide factory methods for this task. If
	 *        <code>null</code> is passed, a factory method producing
	 *        {@link ArrayTuple array-tuples} is used.
	 */
	public SortBasedDivision(MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> sortedDistinctCursor1, MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> sortedDistinctCursor2, Bag<Tuple> bag, Function<Object, ? extends Tuple> tupleFactory) {
		try {
			SortMergeJoin join = new SortMergeJoin(
				sortedDistinctCursor1,
				sortedDistinctCursor2,
				// SweepArea0 would always contain exactly one element.
				// But no solution can be generated by this SweepArea.
				// The use of an EmptyBag would be possible (without exceptions)!
				new PredicateBasedSA<Tuple>(
					new ArrayBag<Tuple>(1),
					SortMergeJoin.computeMetaDataPredicate(
						sortedDistinctCursor1,
						sortedDistinctCursor2,
						SortMergeJoin.Type.NATURAL_JOIN
					),
					0
				) {
					@Override
					public void reorganize(Tuple currentStatus, int ID) throws IllegalStateException {
						clear();
					}
				},
				new PredicateBasedSA<Tuple>(
					bag,
					SortMergeJoin.computeMetaDataPredicate(
						sortedDistinctCursor1,
						sortedDistinctCursor2,
						SortMergeJoin.Type.NATURAL_JOIN
					),
					1
				) {
					@Override
					public void insert(Tuple tuple) {
						super.insert(tuple);
						counter++;
					}
					
					@Override
					public void reorganize(Tuple currentStatus, int ID) throws IllegalStateException {
						// nothing to reorganize
					}
				},
				new Comparator<Tuple>() {
					public int compare(Tuple tuple1, Tuple tuple2) {
						return 1;
					}
				},
				tupleFactory,
				SortMergeJoin.Type.NATURAL_JOIN
			);
					
			MergedResultSetMetaData joinMetaData = (MergedResultSetMetaData)ResultSetMetaDatas.getResultSetMetaData(join);
			
			ArrayList<Integer> indices = new ArrayList<Integer>();
			columns: for (int column = 1; column <= joinMetaData.getColumnCount(); column++) {
				Iterator<Integer> metadatas = joinMetaData.originalMetaDataIndices(column);
				while (metadatas.hasNext())
					if (metadatas.next() == 1)
						continue columns;
				indices.add(column);
			}
			int[] projectedColumns = new int[indices.size()];
			for (int i = 0; i < projectedColumns.length; projectedColumns[i] = indices.get(i++));
			
			final Comparator<Tuple> tupleComparator = Tuples.getTupleComparator(projectedColumns);
			
			this.result = new Selection(
				new Projection(
					join,
					tupleFactory,
					projectedColumns
				),
				new AbstractPredicate<Tuple>() {
					protected Tuple last = null;
					protected int noOfResults = 0;
					
					@Override
					public boolean invoke(Tuple tuple) {
						if (last == null || tupleComparator.compare(last, tuple) != 0)
							noOfResults = 0;
						noOfResults++;
						last = tuple;
						return noOfResults == counter;
					}
				}
			);
		}
		catch (SQLException sqle) {
			throw new MetaDataException("sql exception occured during meta data construction: \'" + sqle.getMessage() + "\'");
		}
	}

	/**
	 * Constructs an instance of the sort-based division operator. An
	 * {@link ListBag list-bag} is used for the sweep area of the internal
	 * {@link SortMergeJoin} operator.
	 *
	 * @param sortedCursor1 the sorted metadata cursor (by the all
	 *        attributes: <tt>order by A1, ..., Ai, B1, ..., Bj</tt>)
	 *        containing elements of the first input relation.
	 * @param sortedCursor2 the sorted metadata cursor (by the quotient
	 *        attributes) containing elements of the second input relation.
	 * @param tupleFactory a function that maps a list of objects (column
	 *        values) to a new tuple. Classes implementing the Tuple interface
	 *        should provide factory methods for this task. If
	 *        <code>null</code> is passed, a factory method producing
	 *        {@link ArrayTuple array-tuples} is used.
	 */
	public SortBasedDivision(MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> sortedCursor1, MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> sortedCursor2, Function<Object, ? extends Tuple> tupleFactory) {
		this(
			new SortBasedDistinct(sortedCursor1, Equal.DEFAULT_INSTANCE),
			new SortBasedDistinct(sortedCursor2, Equal.DEFAULT_INSTANCE),
			new ListBag<Tuple>(),
			tupleFactory
		);
	}

	/**
	 * Constructs an instance of the sort-based division operator.
	 *
	 * @param sortedDistinctResultSet1 the sorted result set (by the all
	 *        attributes: <tt>order by A1, ..., Ai, B1, ..., Bj</tt>)
	 *        containing elements of the first input relation (no duplicates
	 *        allowed).
	 * @param sortedDistinctResultSet2 the sorted result set (by the quotient
	 *        attributes) containing elements of the second input relation (no
	 *        duplicates allowed).
	 * @param bag a bag that is used for the sweep area of the internal
	 *        {@link SortMergeJoin} operator.
	 * @param tupleFactory a function that maps a list of objects (column
	 *        values) to a new tuple. Classes implementing the Tuple interface
	 *        should provide factory methods for this task. If
	 *        <code>null</code> is passed, a factory method producing
	 *        {@link ArrayTuple array-tuples} is used.
	 */
	public SortBasedDivision(ResultSet sortedDistinctResultSet1, ResultSet sortedDistinctResultSet2, Bag<Tuple> bag, Function<Object, ? extends Tuple> tupleFactory) {
		this(
			new ResultSetMetaDataCursor(sortedDistinctResultSet1),
			new ResultSetMetaDataCursor(sortedDistinctResultSet2),
			bag,
			tupleFactory
		);
	}

	/**
	 * Constructs an instance of the sort-based division operator. An
	 * {@link ListBag list} is used for the sweep area of the internal
	 * {@link SortMergeJoin} operator.
	 *
	 * @param sortedResultSet1 the sorted result set (by the all attributes:
	 *        <tt>order by A1, ..., Ai, B1, ..., Bj</tt>) containing elements
	 *        of the first input relation.
	 * @param sortedResultSet2 the sorted result set (by the quotient
	 *        attributes) containing elements of the second input relation.
	 * @param tupleFactory a function that maps a list of objects (column
	 *        values) to a new tuple. Classes implementing the Tuple interface
	 *        should provide factory methods for this task. If
	 *        <code>null</code> is passed, a factory method producing
	 *        {@link ArrayTuple array-tuples} is used.
	 */
	public SortBasedDivision(ResultSet sortedResultSet1, ResultSet sortedResultSet2, Function<Object, ? extends Tuple> tupleFactory) {
		this(
			new ResultSetMetaDataCursor(sortedResultSet1),
			new ResultSetMetaDataCursor(sortedResultSet2),
			tupleFactory
		);
	}
	
	/**
	 * Returns <code>true</code> if the iteration has more elements. (In other
	 * words, returns <code>true</code> if <code>next</code> or
	 * <code>peek</code> would return an element rather than throwing an
	 * exception.)
	 * 
	 * @return <code>true</code> if the cursor has more elements.
	 */
	@Override
	public boolean hasNextObject() {
		return result.hasNext();
	}
	
	/**
	 * Returns the next element in the iteration. This element will be
	 * accessible by some of the cursor's methods, e.g., <code>update</code> or
	 * <code>remove</code>, until a call to <code>next</code> or
	 * <code>peek</code> occurs. This is calling <code>next</code> or
	 * <code>peek</code> proceeds the iteration and therefore its previous
	 * element will not be accessible any more.
	 * 
	 * @return the next element in the iteration.
	 */
	@Override
	public Tuple nextObject() {
		return result.next();
	}

	/**
	 * Resets the cursor to its initial state such that the caller is able to
	 * traverse the underlying data structure again without constructing a new
	 * cursor (optional operation). The modifications, removes and updates
	 * concerning the underlying data structure, are still persistent.
	 * 
	 * <p>Note, that this operation is optional and does not work for this
	 * cursor.</p>
	 *
	 * @throws UnsupportedOperationException if the <code>reset</code>
	 *         operation is not supported by the cursor.
	 */
	@Override
	public void reset() throws UnsupportedOperationException {
		super.reset();
		result.reset();
	}
	
	/**
	 * Returns <code>true</code> if the <code>reset</code> operation is
	 * supported by the cursor. Otherwise it returns <code>false</code>.
	 *
	 * @return <code>true</code> if the <code>reset</code> operation is
	 *         supported by the cursor, otherwise <code>false</code>.
	 */
	@Override
	public boolean supportsReset() {
		return true;
	}
	
	/**
	 * Closes the cursor, i.e., signals the cursor to clean up resources, close
	 * files, etc. When a cursor has been closed calls to methods like
	 * <code>next</code> or <code>peek</code> are not guaranteed to yield
	 * proper results. Multiple calls to <code>close</code> do not have any
	 * effect, i.e., if <code>close</code> was called the cursor remains in the
	 * state <i>closed</i>.
	 * 
	 * <p>Note, that a closed cursor usually cannot be opened again because of
	 * the fact that its state generally cannot be restored when resources are
	 * released respectively files are closed.</p>
	 */
	@Override
	public void close() {
		if (isClosed)
			return;
		super.close();
		result.close();
	}

	/**
	 * Returns the metadata information for this metadata-cursor as a composite
	 * metadata ({@link CompositeMetaData}).
	 *
	 * @return the metadata information for this metadata-cursor as a composite
	 *         metadata ({@link CompositeMetaData}).
	 */
	public CompositeMetaData<Object, Object> getMetaData() {
		return result.getMetaData();
	}
}