package com.taobao.tddl.executor.cursor.impl; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import com.taobao.tddl.common.exception.TddlException; import com.taobao.tddl.common.utils.GeneralUtil; import com.taobao.tddl.executor.cursor.IMergeSortJoinCursor; import com.taobao.tddl.executor.cursor.ISchematicCursor; import com.taobao.tddl.executor.rowset.ArrayRowSet; import com.taobao.tddl.executor.rowset.IRowSet; import com.taobao.tddl.executor.utils.ExecUtils; import com.taobao.tddl.optimizer.core.expression.IOrderBy; import com.taobao.tddl.optimizer.core.plan.query.IJoin; /** * <pre> * sort merge join. * 实现 支持inner join,left join,right join,outter join * 要求两边均按照join列完全有序 * <a>http://en.wikipedia.org/wiki/Sort-merge_join</a> * * * function sortMerge(relation left, relation right, attribute a) * var relation output * var list left_sorted := sort(left, a) // Relation left sorted on attribute a * var list right_sorted := sort(right, a) * var attribute left_key, right_key * var set left_subset, right_subset // These sets discarded except where join predicate is satisfied * advance(left_subset, left_sorted, left_key, a) * advance(right_subset, right_sorted, right_key, a) * while not empty(left_subset) and not empty(right_subset) * if left_key = right_key // Join predicate satisfied * add cross product of left_subset and right_subset to output * advance(left_subset, left_sorted, left_key, a) * advance(right_subset, right_sorted, right_key, a) * else if left_key < right_key * advance(left_subset, left_sorted, left_key, a) * else // left_key > right_key * advance(right_subset, right_sorted, right_key, a) * return output * * // Remove tuples from sorted to subset until the sorted[1].a value changes * function advance(subset out, sorted inout, key out, a in) * key := sorted[1].a * subset := emptySet * while not empty(sorted) and sorted[1].a = key * insert sorted[1] into subset * remove sorted[1] * </pre> * * @author mengshi.sunmengshi 2013-12-18 下午2:13:29 * @since 5.0.0 */ @SuppressWarnings("rawtypes") public class SortMergeJoinCursor extends JoinSchematicCursor implements IMergeSortJoinCursor { protected IRowSet current; private Iterator<IRowSet> resultsIter = null; private IRowSet left_key; private IRowSet right_key; private boolean needAdvanceLeft = true; private boolean needAdvanceRight = true; private List leftSubSet; private List rightSubSet; private List<List<IOrderBy>> joinOrderbys; public SortMergeJoinCursor(ISchematicCursor left_cursor, ISchematicCursor right_cursor, List leftJoinOnColumns, List rightJoinOnColumns) throws TddlException{ super(left_cursor, right_cursor, leftJoinOnColumns, rightJoinOnColumns); this.left_cursor = left_cursor; this.right_cursor = right_cursor; // 暂时以右表的顺序为准,因为目前选择sort merge join主要是针对outter右表存在排序字段 // 后续需要优化orderbys信息,针对sort merge join,左右表的顺序字段都是正确的 this.orderBys = right_cursor.getOrderBy(); this.joinOrderbys = new ArrayList<List<IOrderBy>>(); joinOrderbys.add(left_cursor.getOrderBy()); joinOrderbys.add(right_cursor.getOrderBy()); } public SortMergeJoinCursor(ISchematicCursor left_cursor, ISchematicCursor right_cursor, List leftJoinOnColumns, List rightJoinOnColumns, IJoin join) throws TddlException{ this(left_cursor, right_cursor, leftJoinOnColumns, rightJoinOnColumns); setLeftRightJoin(join); } @Override public IRowSet next() throws TddlException { if (resultsIter != null && resultsIter.hasNext()) { this.current = resultsIter.next(); return this.current; } // right join情况下,若上轮迭代没有匹配,则没有消耗leftSubSet,不需要前移 if (needAdvanceLeft) { this.leftSubSet = new LinkedList(); left_key = advance(leftSubSet, left_cursor, leftJoinOnColumns); } // left join情况下,若上轮迭代没有匹配,则没有消耗rightSubSet,不需要前移 if (needAdvanceRight) { this.rightSubSet = new LinkedList(); right_key = advance(rightSubSet, right_cursor, rightJoinOnColumns); } while (!leftSubSet.isEmpty() && !rightSubSet.isEmpty()) { int compare = compare(left_key, right_key, leftJoinOnColumns, rightJoinOnColumns); if (compare == 0) { this.needAdvanceLeft = true; this.needAdvanceRight = true; List<IRowSet> results = acrossProduct(leftSubSet, rightSubSet); resultsIter = results.iterator(); this.current = resultsIter.next(); return this.current; } else { // outter join情况下,没有消耗就不需要前移 if (this.isLeftOutJoin() || this.isRightOutJoin()) { needAdvanceLeft = false; needAdvanceRight = false; } if (compare < 0) { if (this.isLeftOutJoin()) { this.needAdvanceLeft = true; List<IRowSet> results = acrossProduct(leftSubSet, getNullSubSet(right_cursor.getReturnColumns())); resultsIter = results.iterator(); this.current = resultsIter.next(); return this.current; } left_key = advance(leftSubSet, left_cursor, leftJoinOnColumns); } else { if (this.isRightOutJoin()) { this.needAdvanceRight = true; List<IRowSet> results = acrossProduct(getNullSubSet(left_cursor.getReturnColumns()), rightSubSet); resultsIter = results.iterator(); this.current = resultsIter.next(); return this.current; } right_key = advance(rightSubSet, right_cursor, rightJoinOnColumns); } } } if (!(leftSubSet.isEmpty() && rightSubSet.isEmpty())) { // outter join情况下,要将两个cursor都取完 if (leftSubSet.isEmpty() && this.isRightOutJoin()) { this.needAdvanceRight = true; List<IRowSet> results = acrossProduct(getNullSubSet(left_cursor.getReturnColumns()), rightSubSet); resultsIter = results.iterator(); this.current = resultsIter.next(); return this.current; } if (rightSubSet.isEmpty() && this.isLeftOutJoin()) { this.needAdvanceLeft = true; List<IRowSet> results = acrossProduct(leftSubSet, getNullSubSet(right_cursor.getReturnColumns())); resultsIter = results.iterator(); this.current = resultsIter.next(); return this.current; } } current = null; return current; } private List<IRowSet> getNullSubSet(List columns) { List subSet = new ArrayList(1); List value = new ArrayList(columns.size()); for (int i = 0; i < columns.size(); i++) { value.add(null); } ArrayRowSet row = new ArrayRowSet(CursorMetaImp.buildNew(columns), value.toArray()); subSet.add(row); return subSet; } /** * 两边做笛卡尔积,返回 * * @param leftSubSet * @param rightSubSet * @return */ private List<IRowSet> acrossProduct(List<IRowSet> leftSubSet, List<IRowSet> rightSubSet) { List<IRowSet> results = new ArrayList<IRowSet>(leftSubSet.size() * rightSubSet.size()); for (IRowSet left : leftSubSet) { for (IRowSet right : rightSubSet) { results.add(joinRecord(left, right)); } } return results; } private int compare(IRowSet row1, IRowSet row2, List columns1, List columns2) { Comparator kvPairComparator = ExecUtils.getComp(columns1, columns2, row1.getParentCursorMeta(), row2.getParentCursorMeta()); return kvPairComparator.compare(row1, row2); } private IRowSet advance(List<IRowSet> subSet, ISchematicCursor cursor, List columns) throws TddlException { subSet.clear(); if (cursor.current() == null) { if (cursor.next() == null) { return null; } } // 这里的数据要固化下来,否则next之后数据就丢了 IRowSet key = ExecUtils.fromIRowSetToArrayRowSet(cursor.current()); subSet.add(key); while (cursor.next() != null && compare(key, cursor.current(), columns, columns) == 0) { subSet.add(ExecUtils.fromIRowSetToArrayRowSet(cursor.current())); } return key; } @Override public List<List<IOrderBy>> getJoinOrderBys() { return joinOrderbys; } @Override public String toString() { return toStringWithInden(0); } @Override public String toStringWithInden(int inden) { StringBuilder sb = new StringBuilder(); String subQueryTab = GeneralUtil.getTab(inden); ExecUtils.printMeta(joinCursorMeta, inden, sb); ExecUtils.printOrderBy(orderBys, inden, sb); sb.append(subQueryTab).append("【Sort Merge Join : ").append("\n"); sb.append(subQueryTab).append("leftCursor:").append("\n"); sb.append(left_cursor.toStringWithInden(inden + 1)); sb.append(subQueryTab).append("rightCursor:").append("\n"); sb.append(right_cursor.toStringWithInden(inden + 1)); return sb.toString(); } }