JoinEdge.java example

Explorer
tesora-dve-pub-master
package com.tesora.dve.sql.jg;

/*
 * #%L
 * Tesora Inc.
 * Database Virtualization Engine
 * %%
 * Copyright (C) 2011 - 2014 Tesora Inc.
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License, version 3,
 * as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 * #L%
 */


import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import com.tesora.dve.sql.expression.TableKey;
import com.tesora.dve.sql.node.expression.ColumnInstance;
import com.tesora.dve.sql.node.expression.ExpressionNode;
import com.tesora.dve.sql.node.structural.JoinSpecification;
import com.tesora.dve.sql.node.structural.JoinedTable;
import com.tesora.dve.sql.schema.DistributionVector;
import com.tesora.dve.sql.schema.PEAbstractTable;
import com.tesora.dve.sql.schema.PEColumn;
import com.tesora.dve.sql.schema.PEKey;
import com.tesora.dve.sql.schema.PEStorageGroup;
import com.tesora.dve.sql.schema.SchemaContext;
import com.tesora.dve.sql.util.BinaryProcedure;
import com.tesora.dve.sql.util.Functional;
import com.tesora.dve.sql.util.ListOfPairs;
import com.tesora.dve.sql.util.ListSet;
import com.tesora.dve.sql.util.Pair;
import com.tesora.dve.sql.util.UnaryFunction;

public class JoinEdge extends DGEdge<DPart> {
	
	DGJoin join;
	
	// only equijoins for now
	private ListOfPairs<ExpressionNode, ExpressionNode> joinCondition =
		new ListOfPairs<ExpressionNode, ExpressionNode>();
	
	// key matches on each side, used during costing.  we prefer whole matches.
	private Pair<Boolean, PEKey> leftKey;
	private Pair<Boolean, PEKey> rightKey;
	
	Map<PEColumn, PEColumn> mapping = null;

	private Boolean colocated = null;

	private TableKey lhs;
	private TableKey rhs;
	
	
	public JoinEdge(int id,DunPart lhs, DunPart rhs, DGJoin theJoin) {
		super(lhs,rhs,id);
		from = lhs;
		to = rhs;
		join = theJoin;
		this.lhs = from.getTables().get(0);
		this.rhs = to.getTables().get(0);
		join.addEdge(this);
	}
	
	public TableKey getLHSTab() {
		return lhs;
	}
	
	public TableKey getRHSTab() {
		return rhs;
	}
	
	public DGJoin getJoin() {
		return join;
	}
	
	public boolean isInnerJoin() {
		return join.isInnerJoin();
	}
	
	public JoinSpecification getJoinType() {
		return join.getJoinType();
	}
	
	public boolean isColocated(SchemaContext sc) {
		if (colocated == null) 
			colocated = Boolean.valueOf(computeColocated(sc));
		return colocated.booleanValue();
	}
	
	public void addJoinExpression(ExpressionNode lhs, ExpressionNode rhs) {
		joinCondition.add(lhs,rhs);
	}
		
	public void setFrom(DPart dp) {
		from = dp;
	}
	
	public void setTo(DPart dp) {
		to = dp;
	}
	
	public static boolean computeColocated(SchemaContext sc, 
			DPart leftPartition, Collection<TableKey> left,
			DPart rightPartition, TableKey right,
			Map<PEColumn,PEColumn> mapping, JoinedTable enclosingJoin, boolean isFKTest) {
		PEAbstractTable<?> rtab = right.getAbstractTable();
		HashSet<PEStorageGroup> leftGroups = new HashSet<PEStorageGroup>();
		int nbcast = 0;
		int nojbcast = 0;
		int nten = 0;

		ListSet<PEColumn> leftCols = new ListSet<PEColumn>();
		ListSet<PEColumn> rightCols = new ListSet<PEColumn>();
		leftCols.addAll(mapping.keySet());
		rightCols.addAll(mapping.values());

		DistributionVector rvect = rightPartition.getGoverningVector(sc, right);
		
		for(TableKey tk : left) {
			boolean skipReqdColumnsCheck = false;
			DistributionVector lvect = leftPartition.getGoverningVector(sc, tk);
			PEAbstractTable<?> ltab = tk.getAbstractTable();
			if (leftGroups.add(ltab.getStorageGroup(sc))) {
				if (!ltab.getStorageGroup(sc).isSubsetOf(sc, rtab.getStorageGroup(sc)))
					return false;
			}
			if (!lvect.comparableForDistribution(sc, rvect, mapping, isFKTest)) {
				return false;
			} else if (lvect.isBroadcast() && rvect.isBroadcast()) {
				nbcast++;
				skipReqdColumnsCheck = true;
			} else if (lvect.isBroadcast() || rvect.isBroadcast()) {
				// so, we might not be colocated if this is an outer join.  in that case even though we technically 
				// are colocated, we won't execute correctly because the rows from the bcast table will be repeated.
				if (enclosingJoin != null && 
						((enclosingJoin.getJoinType().isLeftOuterJoin() && lvect.isBroadcast())
								|| (enclosingJoin.getJoinType().isRightOuterJoin() && rvect.isBroadcast()))) {
					return false;
				} else if (isFKTest) {
					// early return, since fks are only between two tables
					return !lvect.isBroadcast();
				} else {
					nojbcast++;
					skipReqdColumnsCheck = true;
				}
			}
			if ((isFKTest || (sc.getPolicyContext().isSchemaTenant() || sc.getPolicyContext().isDataTenant())) 
					&& lvect.getDistributedWhollyOnTenantColumn(sc) != null &&
							rvect.getDistributedWhollyOnTenantColumn(sc) != null)
				nten++;
			else if (!skipReqdColumnsCheck && !lvect.hasJoinRequiredColumns(sc,leftCols))
				return false;
		}
		if (nbcast == left.size() || nojbcast == left.size() || nten == left.size())
			return true;
		
		if (!rvect.hasJoinRequiredColumns(sc, rightCols))
			return false;
		
		return true;		
	}
	
	public boolean isSimpleJoin() {
		for(Pair<ExpressionNode, ExpressionNode> p : joinCondition) {
			if (p.getFirst() instanceof ColumnInstance && p.getSecond() instanceof ColumnInstance)
				continue;
			return false;
		}
		return true;
	}
	
	public ListOfPairs<ColumnInstance,ColumnInstance> getSimpleColumns() {
		ListOfPairs<ColumnInstance, ColumnInstance> out = new ListOfPairs<ColumnInstance, ColumnInstance>();
		for(Pair<ExpressionNode,ExpressionNode> c : joinCondition) {
			if (c.getFirst() instanceof ColumnInstance && c.getSecond() instanceof ColumnInstance)
				out.add((ColumnInstance)c.getFirst(), (ColumnInstance)c.getSecond());
		}
		return out;
	}
	
	private boolean computeColocated(SchemaContext sc) {
		ListSet<TableKey> leftKeys = new ListSet<TableKey>();
		leftKeys.addAll(join.getLeftTables());
		TableKey right = join.getRightTable();
		mapping = new HashMap<PEColumn, PEColumn>();
		for(Pair<ColumnInstance, ColumnInstance> p : getSimpleColumns()) {
			mapping.put(p.getFirst().getPEColumn(), p.getSecond().getPEColumn());
		}
 		return computeColocated(sc,getFrom(),leftKeys,getTo(),right,mapping,join.enclosingJoin,false);
	}

	List<ExpressionNode> getRedistJoinExpressions(final TableKey forTable) {
		// using the joinCondition will guarantee that the dv is the same for both - same ordering of values
		return Functional.apply(joinCondition, new UnaryFunction<ExpressionNode, Pair<ExpressionNode, ExpressionNode>>() {

			@Override
			public ExpressionNode evaluate(Pair<ExpressionNode, ExpressionNode> object) {
				if (getFrom().getTables().contains(forTable))
					return object.getFirst();
				else
					return object.getSecond();
			}
			
		});
		
	}
	
	public PEKey getLeftKey(SchemaContext sc) {
		if (leftKey == null)
			leftKey = computeBestKey(sc, new UnaryFunction<PEColumn,Pair<ColumnInstance,ColumnInstance>>(){

				@Override
				public PEColumn evaluate(
						Pair<ColumnInstance, ColumnInstance> object) {
					return object.getFirst().getPEColumn();
				}
				
			});
		if (leftKey.getFirst().booleanValue())
			return leftKey.getSecond();
		return null;
	}

	public PEKey getRightKey(SchemaContext sc) {
		if (rightKey == null) 
			rightKey = computeBestKey(sc, new UnaryFunction<PEColumn,Pair<ColumnInstance,ColumnInstance>>() {

				@Override
				public PEColumn evaluate(
						Pair<ColumnInstance, ColumnInstance> object) {
					return object.getSecond().getPEColumn();
				}
				
			});
		if (rightKey.getFirst().booleanValue())
			return rightKey.getSecond();
		return null;
	}
	
	private Pair<Boolean,PEKey> computeBestKey(SchemaContext sc, UnaryFunction<PEColumn,Pair<ColumnInstance,ColumnInstance>> sideFilter) {
		List<PEColumn> cols = Functional.apply(getSimpleColumns(),sideFilter);
		ListSet<PEKey> match = buildMatching(sc, cols);
		PEKey uniq = null;
		PEKey index = null;
		for(PEKey p : match) {
			if (uniq == null && (p.isUnique()))
				uniq = p;
			else if (!p.isUnique()) {
				if (index == null) index = p;
				else if (p.getCardRatio(sc) < index.getCardRatio(sc)) index = p;
			}
		}
		if (uniq != null) 
			return new Pair<Boolean,PEKey>(true,uniq);
		if (index != null)
			return new Pair<Boolean,PEKey>(true,index);
		return new Pair<Boolean,PEKey>(false, null);
	}
	

	// return keys that wholly match one or more of the columns
	private ListSet<PEKey> buildMatching(SchemaContext sc, List<PEColumn> cols) {
		ListSet<PEKey> candidates = new ListSet<PEKey>();
		for(PEColumn pec : cols) {
			List<PEKey> refs = pec.getReferencedBy(sc);
			for(PEKey pek : refs) {
				HashSet<PEColumn> uses = new HashSet<PEColumn>(pek.getColumns(sc));
				uses.removeAll(cols);
				if (uses.isEmpty()) 
					candidates.add(pek);
			}
		}
		return candidates;
	}

	@Override
	protected void describeInternal(SchemaContext sc, String indent, StringBuilder buf) {
		join.describe(sc, buf);
		buf.append(" [").append(from.getGraphID()).append(",").append(to.getGraphID()).append("] on ");
		Functional.join(joinCondition, buf, ", ", new BinaryProcedure<Pair<ExpressionNode,ExpressionNode>,StringBuilder>() {

			@Override
			public void execute(Pair<ExpressionNode, ExpressionNode> aobj,
					StringBuilder bobj) {
				bobj.append(aobj.getFirst().toString()).append("=").append(aobj.getSecond().toString());
			}
			
		});
	}

	@Override
	public String getGraphRole() {
		return "Join";
	}

	public boolean isSame(JoinEdge other) {
		boolean ret = false;

		if (!this.lhs.equals(other.lhs) || !this.rhs.equals(other.rhs)) {
			return ret;
		}
		
		for(Pair<ExpressionNode, ExpressionNode> thisPair : this.joinCondition) {
			ret = false;
			for(Pair<ExpressionNode, ExpressionNode> otherPair : other.joinCondition) {
				if (thisPair.getFirst().getRewriteKey().equals(otherPair.getFirst().getRewriteKey()) &&
					thisPair.getSecond().getRewriteKey().equals(otherPair.getSecond().getRewriteKey())) {
					ret = true;
					continue;
				}
			}
			
			if (!ret) { 
				break;
			}
		}
		
		return ret;
	}	
}