/*
Copyright (C) 2006 EBI
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the itmplied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.biomart.builder.model;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import org.biomart.builder.model.DataSet.DataSetColumn;
import org.biomart.builder.model.DataSet.DataSetTable;
import org.biomart.builder.model.DataSet.ExpressionColumnDefinition;
import org.biomart.builder.model.DataSet.DataSetColumn.ExpressionColumn;
import org.biomart.builder.model.DataSet.DataSetColumn.InheritedColumn;
import org.biomart.builder.model.DataSet.DataSetColumn.WrappedColumn;
import org.biomart.builder.model.Relation.UnrolledRelationDefinition;
import org.biomart.common.exceptions.BioMartError;
/**
* This interface defines a unit of transformation for mart construction.
*
* @author Richard Holland <holland@ebi.ac.uk>
* @version $Revision: 1.29 $, $Date: 2008-02-19 13:27:29 $, modified by
* $Author: rh4 $
* @since 0.6
*/
public abstract class TransformationUnit {
/**
* A map of source schema column names to dataset column objects.
*/
private final Map newColumnNameMap;
private TransformationUnit previousUnit;
/**
* Constructs a new transformation unit that follows on from a given
* previous unit (optional, can be <tt>null</tt>).
*
* @param previousUnit
* the unit this one comes after.
*/
public TransformationUnit(final TransformationUnit previousUnit) {
this.newColumnNameMap = new HashMap();
this.previousUnit = previousUnit;
}
/**
* Does this unit apply to the given schema prefix?
*
* @param schemaPrefix
* the prefix.
* @return <tt>true</tt> if it does.
*/
public boolean appliesToPartition(final String schemaPrefix) {
return this.previousUnit == null ? true : this.previousUnit
.appliesToPartition(schemaPrefix);
}
/**
* Find out what unit came before this one.
*
* @return the previous unit. May be <tt>null</tt>.
*/
public TransformationUnit getPreviousUnit() {
return this.previousUnit;
}
/**
* Change the previous unit to this one.
*
* @param previousUnit
* the new previous unit. <tt>null</tt> to remove it.
*/
public void setPreviousUnit(final TransformationUnit previousUnit) {
this.previousUnit = previousUnit;
}
/**
* Obtain a map of columns defined in this unit. The keys are schema
* columns. The values are the dataset column names used for those columns
* after this unit has been applied.
*
* @return the map of columns. Potentially empty but never <tt>null</tt>.
*/
public Map getNewColumnNameMap() {
return this.newColumnNameMap;
}
/**
* Given a schema column, work out which dataset column in the
* transformation so far refers to it. If the column was not adopted in this
* particular unit it will go back until it finds the unit that adopted it,
* and interrogate that and return the results.
*
* @param column
* the column to look for.
* @return the matching dataset column. May be <tt>null</tt> if the column
* is not in this dataset table at all.
*/
public abstract DataSetColumn getDataSetColumnFor(final Column column);
/**
* This type of transformation selects columns from a single table.
*/
public static class SelectFromTable extends TransformationUnit {
private static final long serialVersionUID = 1L;
private final Table table;
private SelectFromTable(final TransformationUnit previousUnit,
final Table table) {
super(previousUnit);
this.table = table;
}
/**
* Instantiate a unit that selects from the given schema table.
*
* @param table
* the table this unit selects from.
*/
public SelectFromTable(final Table table) {
super(null);
this.table = table;
}
public boolean appliesToPartition(final String schemaPrefix) {
return this.table.existsForPartition(schemaPrefix)
&& super.appliesToPartition(schemaPrefix);
}
/**
* Find out which schema table this unit selects from.
*
* @return the schema table this unit selects from.
*/
public Table getTable() {
return this.table;
}
private boolean columnMatches(final Column column, DataSetColumn dsCol) {
if (dsCol == null)
return false;
while (dsCol instanceof InheritedColumn)
dsCol = ((InheritedColumn) dsCol).getInheritedColumn();
if (dsCol instanceof WrappedColumn)
return ((WrappedColumn) dsCol).getWrappedColumn()
.equals(column);
return false;
}
public DataSetColumn getDataSetColumnFor(final Column column) {
DataSetColumn candidate = (DataSetColumn) this
.getNewColumnNameMap().get(column);
if (candidate == null)
// We need to check each of our columns to see if they
// are dataset columns, and if so, if they point to
// the appropriate real column.
for (final Iterator i = this.getNewColumnNameMap().values()
.iterator(); i.hasNext() && candidate == null;) {
candidate = (DataSetColumn) i.next();
if (!this.columnMatches(column, candidate))
candidate = null;
}
return candidate;
}
}
/**
* This unit joins an existing dataset table to a schema table.
*/
public static class JoinTable extends SelectFromTable {
private static final long serialVersionUID = 1L;
private List sourceDataSetColumns;
private Key schemaSourceKey;
private Relation schemaRelation;
private int schemaRelationIteration;
/**
* Construct a new join unit.
*
* @param previousUnit
* the unit that precedes this one.
* @param table
* the table we are joining to.
* @param sourceDataSetColumns
* the columns in the existing dataset table that are used to
* make the join.
* @param schemaSourceKey
* the key in the schema table that we are joining to.
* @param schemaRelation
* the relation we are following to make the join.
* @param schemaRelationIteration
* the number of the compound relation, if it is compound.
* Use 0 if it is not.
*/
public JoinTable(final TransformationUnit previousUnit,
final Table table, final List sourceDataSetColumns,
final Key schemaSourceKey, final Relation schemaRelation,
final int schemaRelationIteration) {
super(previousUnit, table);
this.sourceDataSetColumns = sourceDataSetColumns;
this.schemaSourceKey = schemaSourceKey;
this.schemaRelation = schemaRelation;
this.schemaRelationIteration = schemaRelationIteration;
}
public boolean appliesToPartition(final String schemaPrefix) {
for (final Iterator i = this.sourceDataSetColumns.iterator(); i
.hasNext();) {
final DataSetColumn dsCol = (DataSetColumn) i.next();
if (!dsCol.existsForPartition(schemaPrefix)) {
return false;
}
}
return super.appliesToPartition(schemaPrefix);
}
/**
* Get the dataset columns this transformation starts from.
*
* @return the columns.
*/
public List getSourceDataSetColumns() {
return this.sourceDataSetColumns;
}
/**
* Get the schema table key this transformation joins to.
*
* @return the key we are joining to.
*/
public Key getSchemaSourceKey() {
return this.schemaSourceKey;
}
/**
* Get the schema relation used to make the join.
*
* @return the relation.
*/
public Relation getSchemaRelation() {
return this.schemaRelation;
}
/**
* Get the number of the compound relation used, or 0 if it is not
* compound.
*
* @return the compound relation number.
*/
public int getSchemaRelationIteration() {
return this.schemaRelationIteration;
}
public DataSetColumn getDataSetColumnFor(final Column column) {
DataSetColumn candidate = (DataSetColumn) this
.getNewColumnNameMap().get(column);
if (candidate == null && this.getPreviousUnit() != null) {
final Key ourKey = Arrays.asList(
this.schemaRelation.getFirstKey().getColumns())
.contains(column) ? this.schemaRelation.getFirstKey()
: this.schemaRelation.getSecondKey();
final Key parentKey = this.schemaRelation.getOtherKey(ourKey);
final int pos = Arrays.asList(ourKey.getColumns()).indexOf(
column);
if (pos >= 0)
candidate = this.getPreviousUnit().getDataSetColumnFor(
parentKey.getColumns()[pos]);
if (candidate == null)
candidate = this.getPreviousUnit().getDataSetColumnFor(
column);
}
return candidate;
}
}
/**
* This unit adds expression columns to a dataset table. The expressions are
* already defined in the table as {@link ExpressionColumn}s.
*/
public static class Expression extends TransformationUnit {
private static final long serialVersionUID = 1L;
private DataSetTable dsTable;
/**
* Construct an expression unit.
*
* @param previousUnit
* the previous unit in the chain.
* @param dsTable
* the table we are adding expressions to.
*/
public Expression(final TransformationUnit previousUnit,
final DataSetTable dsTable) {
super(previousUnit);
this.dsTable = dsTable;
}
public DataSetColumn getDataSetColumnFor(final Column column) {
if (this.getPreviousUnit() != null)
return this.getPreviousUnit().getDataSetColumnFor(column);
else
// Should never happen.
throw new BioMartError();
}
/**
* Get the dataset table that will receive the expressions.
*
* @return the table.
*/
public DataSetTable getDataSetTable() {
return this.dsTable;
}
/**
* Get an ordered collection where each member is a collection of
* expression columns that can be added in a single step.
*
* @return the ordered collection of collections of expressions.
*/
public Collection getOrderedExpressionGroups() {
final List groups = new ArrayList();
final Collection entries = new TreeSet(new Comparator() {
public int compare(Object a, Object b) {
final Map.Entry entryA = (Map.Entry) a;
final Map.Entry entryB = (Map.Entry) b;
final String colNameA = ((Column) entryA.getKey())
.getName();
final ExpressionColumnDefinition exprA = ((ExpressionColumn) entryA
.getValue()).getDefinition();
final ExpressionColumnDefinition exprB = ((ExpressionColumn) entryB
.getValue()).getDefinition();
return exprB.getAliases().keySet().contains(colNameA) ? -1
: exprA.isGroupBy() == exprB.isGroupBy() ? 1 : -1;
}
});
entries.addAll(this.getNewColumnNameMap().entrySet());
// Iterator over entries and sort into groups.
Map.Entry previousEntry = null;
Collection currentGroup = new HashSet();
groups.add(currentGroup);
for (final Iterator i = entries.iterator(); i.hasNext();) {
final Map.Entry entry = (Map.Entry) i.next();
if (previousEntry != null) {
final String colNameA = ((Column) entry.getKey()).getName();
final ExpressionColumnDefinition exprA = ((ExpressionColumn) entry
.getValue()).getDefinition();
final ExpressionColumnDefinition exprB = ((ExpressionColumn) previousEntry
.getValue()).getDefinition();
if (exprB.getAliases().keySet().contains(colNameA)
|| !(exprA.isGroupBy() == exprB.isGroupBy())) {
currentGroup = new HashSet();
groups.add(currentGroup);
}
}
currentGroup.add(entry.getValue());
previousEntry = entry;
}
return groups;
}
}
/**
* This unit joins an existing dataset table to a schema table, or at least
* would do that if the join were ever to be made, which it won't.
*/
public static class SkipTable extends JoinTable {
private static final long serialVersionUID = 1L;
/**
* Construct a new join unit.
*
* @param previousUnit
* the unit that precedes this one.
* @param table
* the table we are joining to.
* @param sourceDataSetColumns
* the columns in the existing dataset table that are used to
* make the join.
* @param schemaSourceKey
* the key in the schema table that we are joining to.
* @param schemaRelation
* the relation we are following to make the join.
* @param schemaRelationIteration
* the number of the compound relation, if it is compound.
* Use 0 if it is not.
*/
public SkipTable(final TransformationUnit previousUnit,
final Table table, final List sourceDataSetColumns,
final Key schemaSourceKey, final Relation schemaRelation,
final int schemaRelationIteration) {
super(previousUnit, table, sourceDataSetColumns, schemaSourceKey,
schemaRelation, schemaRelationIteration);
}
}
/**
* This type of transformation unrolls tables.
*/
public static class UnrollTable extends TransformationUnit {
private static final long serialVersionUID = 1L;
private final Relation relation;
private final List sourceDataSetColumns;
private final UnrolledRelationDefinition unrolledDef;
private final DataSetColumn unrolledIDColumn;
private final DataSetColumn unrolledNameColumn;
/**
* Instantiate a unit that selects from the given schema table.
*
* @param previousUnit
* the unit that precedes this one.
* @param relation
* the relation we are unrolling.
* @param sourceDataSetColumns
* the columns in the existing dataset table that are used to
* make the join.
* @param unrolledDef
* the unrolled relation definition.
* @param unrolledIDColumn
* the unrolled ID column.
* @param unrolledNameColumn
* the unrolled name column.
*/
public UnrollTable(final TransformationUnit previousUnit,
final Relation relation, final List sourceDataSetColumns,
final UnrolledRelationDefinition unrolledDef,
final DataSetColumn unrolledIDColumn,
final DataSetColumn unrolledNameColumn) {
super(previousUnit);
this.relation = relation;
this.sourceDataSetColumns = sourceDataSetColumns;
this.unrolledDef = unrolledDef;
this.unrolledIDColumn = unrolledIDColumn;
this.unrolledNameColumn = unrolledNameColumn;
}
/**
* @return the relation
*/
public Relation getRelation() {
return this.relation;
}
/**
* @return the sourceDataSetColumns
*/
public List getSourceDataSetColumns() {
return this.sourceDataSetColumns;
}
/**
* @return the unrolledDef
*/
public UnrolledRelationDefinition getUnrolledDef() {
return this.unrolledDef;
}
/**
* @return the unrolledIDColumn
*/
public DataSetColumn getUnrolledIDColumn() {
return this.unrolledIDColumn;
}
/**
* @return the unrolledNameColumn
*/
public DataSetColumn getUnrolledNameColumn() {
return this.unrolledNameColumn;
}
public DataSetColumn getDataSetColumnFor(final Column column) {
if (this.getPreviousUnit() != null)
return this.getPreviousUnit().getDataSetColumnFor(column);
else
return null;
}
}
}