ColumnMapKeyPrune.java example

Explorer
spork-streaming-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pig.newplan.logical.rules;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.Pair;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.logical.relational.LOLoad;
import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
import org.apache.pig.newplan.optimizer.Transformer;

/**
 * This Rule prunes columns and map keys and set to loader. This rule depends
 * on MapKeysPruneHelper to calculate what keys are required for a loader,
 * and ColumnPruneHelper to calculate the required columns for a loader. Then
 * it combines the map keys and columns info to set into the loader.
 */
public class ColumnMapKeyPrune extends WholePlanRule {
    private boolean hasRun;
    
    public ColumnMapKeyPrune(String n) {
        super(n, false);
        hasRun = false;
    }

    @Override
    public Transformer getNewTransformer() {
        return new ColumnMapKeyPruneTransformer();
    }
    
    public class ColumnMapKeyPruneTransformer extends Transformer {
        private MapKeysPruneHelper mapKeyHelper;
        private ColumnPruneHelper columnHelper;
        private boolean columnPrune;
        private boolean mapKeyPrune;

        /*
         * This is a map of of required columns and map keys for each LOLoad        
         * RequiredMapKeys --> Map<Integer, Set<String> >
         * RequiredColumns --> Set<Integer>
         * 
         * The integer are column indexes.
         */
        private Map<LOLoad,Pair<Map<Integer,Set<String>>,Set<Integer>>> requiredItems = 
            new HashMap<LOLoad,Pair<Map<Integer,Set<String>>,Set<Integer>>>();
        
        @Override
        public boolean check(OperatorPlan matched) throws FrontendException {
            // only run this rule once
            if (hasRun) {
                return false;
            }
            
            hasRun = true;
            mapKeyHelper = new MapKeysPruneHelper(matched);
            columnHelper = new ColumnPruneHelper(matched);
            
            // check if map keys can be pruned
            mapKeyPrune = mapKeyHelper.check();
            // check if columns can be pruned
            columnPrune = columnHelper.check();
            
            return mapKeyPrune || columnPrune;
        }

        @Override
        public OperatorPlan reportChanges() {
            return currentPlan;
        }
        
        @SuppressWarnings("unchecked")
        private void merge() throws FrontendException {            
            // combine annotations
            for( Operator source : currentPlan.getSources() ) {
                Map<Integer,Set<String>> mapKeys = 
                    (Map<Integer, Set<String>>) source.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
                Set<Integer> requiredColumns = null;
                if (source.getAnnotation(ColumnPruneHelper.REQUIREDCOLS) != null) {
                    requiredColumns = new HashSet<Integer>((Set<Integer>) source.getAnnotation(ColumnPruneHelper.REQUIREDCOLS));
                }
                
                // We dont have any information so skip
                if( requiredColumns == null && mapKeys == null ) {
                    continue;
                }
                                
                if( requiredColumns != null && mapKeys != null ) { 

                    Set<Integer> duplicatedCols = new HashSet<Integer>();

                    // Remove the columns already marked by MapKeys
                    for( Integer col : requiredColumns ) {
                        if( mapKeys.containsKey(col) ) {
                            duplicatedCols.add(col);
                        }
                    }
                    requiredColumns.removeAll(duplicatedCols);
                } else if ( mapKeys != null && requiredColumns == null ) {
                    // This is the case where only mapKeys can be pruned. And none
                    // of the columns can be pruned. So we add all columns to the
                    // requiredcolumns part
                    requiredColumns = new HashSet<Integer>();
                    for(int i = 0; i < ((LogicalRelationalOperator)source).getSchema().size(); i++ ) {
                        if( !mapKeys.containsKey(i) ) {
                            requiredColumns.add(i);
                        }
                    }
                }

                requiredItems.put((LOLoad) source, new Pair<Map<Integer,Set<String>>,Set<Integer>>(mapKeys, requiredColumns));
            }         
        }

        @Override
        public void transform(OperatorPlan matched) throws FrontendException {        	            
            merge();
            
            ColumnPruneVisitor columnPruneVisitor = new ColumnPruneVisitor(currentPlan, requiredItems, columnPrune);
            columnPruneVisitor.visit();
        }
    }
}