AggregateImplementation.java example

Explorer
hbase-trunk-mttr-master
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.coprocessor;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.coprocessor.AggregationClient;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.ResponseConverter;
import org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateArgument;
import org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateResponse;
import org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateService;
import org.apache.hadoop.hbase.regionserver.InternalScanner;

import com.google.protobuf.ByteString;
import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;

/**
 * A concrete AggregateProtocol implementation. Its system level coprocessor
 * that computes the aggregate function at a region level.
 * @param <T>
 * @param <S>
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class AggregateImplementation<T, S> extends AggregateService implements
    CoprocessorService, Coprocessor {
  protected static Log log = LogFactory.getLog(AggregateImplementation.class);
  private RegionCoprocessorEnvironment env;

  /**
   * Gives the maximum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, maximum value for the
   * entire column family will be returned.
   */
  @Override
  public void getMax(RpcController controller, AggregateArgument request,
      RpcCallback<AggregateResponse> done) {
    InternalScanner scanner = null;
    AggregateResponse response = null;
    T max = null;
    try {
      ColumnInterpreter<T, S> ci = constructColumnInterpreterFromRequest(request);
      T temp;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      List<KeyValue> results = new ArrayList<KeyValue>();
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      // qualifier can be null.
      boolean hasMoreRows = false;
      do {
        hasMoreRows = scanner.next(results);
        for (KeyValue kv : results) {
          temp = ci.getValue(colFamily, qualifier, kv);
          max = (max == null || (temp != null && ci.compare(temp, max) > 0)) ? temp : max;
        }
        results.clear();
      } while (hasMoreRows);
      if (max != null) {
        AggregateResponse.Builder builder = AggregateResponse.newBuilder();
        builder.addFirstPart(ci.getProtoForCellType(max));
        response = builder.build();
      }
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    log.info("Maximum from this region is "
        + env.getRegion().getRegionNameAsString() + ": " + max);
    done.run(response);
  }

  /**
   * Gives the minimum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, minimum value for the
   * entire column family will be returned.
   */
  @Override
  public void getMin(RpcController controller, AggregateArgument request,
      RpcCallback<AggregateResponse> done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    T min = null;
    try {
      ColumnInterpreter<T, S> ci = constructColumnInterpreterFromRequest(request);
      T temp;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      List<KeyValue> results = new ArrayList<KeyValue>();
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      boolean hasMoreRows = false;
      do {
        hasMoreRows = scanner.next(results);
        for (KeyValue kv : results) {
          temp = ci.getValue(colFamily, qualifier, kv);
          min = (min == null || (temp != null && ci.compare(temp, min) < 0)) ? temp : min;
        }
        results.clear();
      } while (hasMoreRows);
      if (min != null) {
        response = AggregateResponse.newBuilder().addFirstPart( 
          ci.getProtoForCellType(min)).build();
      }
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    log.info("Minimum from this region is "
        + env.getRegion().getRegionNameAsString() + ": " + min);
    done.run(response);
  }

  /**
   * Gives the sum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, sum for the entire column
   * family will be returned.
   */
  @Override
  public void getSum(RpcController controller, AggregateArgument request,
      RpcCallback<AggregateResponse> done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    long sum = 0l;
    try {
      ColumnInterpreter<T, S> ci = constructColumnInterpreterFromRequest(request);
      S sumVal = null;
      T temp;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      List<KeyValue> results = new ArrayList<KeyValue>();
      boolean hasMoreRows = false;
      do {
        hasMoreRows = scanner.next(results);
        for (KeyValue kv : results) {
          temp = ci.getValue(colFamily, qualifier, kv);
          if (temp != null)
            sumVal = ci.add(sumVal, ci.castToReturnType(temp));
        }
        results.clear();
      } while (hasMoreRows);
      if (sumVal != null) {
        response = AggregateResponse.newBuilder().addFirstPart( 
          ci.getProtoForPromotedType(sumVal)).build();
      }
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    log.debug("Sum from this region is "
        + env.getRegion().getRegionNameAsString() + ": " + sum);
    done.run(response);
  }

  /**
   * Gives the row count for the given column family and column qualifier, in
   * the given row range as defined in the Scan object.
   * @throws IOException
   */
  @Override
  public void getRowNum(RpcController controller, AggregateArgument request,
      RpcCallback<AggregateResponse> done) {
    AggregateResponse response = null;
    long counter = 0l;
    List<KeyValue> results = new ArrayList<KeyValue>();
    InternalScanner scanner = null;
    try {
      Scan scan = ProtobufUtil.toScan(request.getScan());
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      if (scan.getFilter() == null && qualifier == null)
        scan.setFilter(new FirstKeyOnlyFilter());
      scanner = env.getRegion().getScanner(scan);
      boolean hasMoreRows = false;
      do {
        hasMoreRows = scanner.next(results);
        if (results.size() > 0) {
          counter++;
        }
        results.clear();
      } while (hasMoreRows);
      ByteBuffer bb = ByteBuffer.allocate(8).putLong(counter);
      bb.rewind();
      response = AggregateResponse.newBuilder().addFirstPart( 
          ByteString.copyFrom(bb)).build();
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    log.info("Row counter from this region is "
        + env.getRegion().getRegionNameAsString() + ": " + counter);
    done.run(response);
  }

  /**
   * Gives a Pair with first object as Sum and second object as row count,
   * computed for a given combination of column qualifier and column family in
   * the given row range as defined in the Scan object. In its current
   * implementation, it takes one column family and one column qualifier (if
   * provided). In case of null column qualifier, an aggregate sum over all the
   * entire column family will be returned.
   * <p>
   * The average is computed in
   * {@link AggregationClient#avg(byte[], ColumnInterpreter, Scan)} by
   * processing results from all regions, so its "ok" to pass sum and a Long
   * type.
   */
  @Override
  public void getAvg(RpcController controller, AggregateArgument request,
      RpcCallback<AggregateResponse> done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    try {
      ColumnInterpreter<T, S> ci = constructColumnInterpreterFromRequest(request);
      S sumVal = null;
      Long rowCountVal = 0l;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      List<KeyValue> results = new ArrayList<KeyValue>();
      boolean hasMoreRows = false;
    
      do {
        results.clear();
        hasMoreRows = scanner.next(results);
        for (KeyValue kv : results) {
          sumVal = ci.add(sumVal, ci.castToReturnType(ci.getValue(colFamily,
              qualifier, kv)));
        }
        rowCountVal++;
      } while (hasMoreRows);
      if (sumVal != null) {
        ByteString first = ci.getProtoForPromotedType(sumVal);
        AggregateResponse.Builder pair = AggregateResponse.newBuilder();
        pair.addFirstPart(first);
        ByteBuffer bb = ByteBuffer.allocate(8).putLong(rowCountVal);
        bb.rewind();
        pair.setSecondPart(ByteString.copyFrom(bb));
        response = pair.build();
      }
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    done.run(response);
  }

  /**
   * Gives a Pair with first object a List containing Sum and sum of squares,
   * and the second object as row count. It is computed for a given combination of
   * column qualifier and column family in the given row range as defined in the
   * Scan object. In its current implementation, it takes one column family and
   * one column qualifier (if provided). The idea is get the value of variance first:
   * the average of the squares less the square of the average a standard
   * deviation is square root of variance.
   */
  @Override
  public void getStd(RpcController controller, AggregateArgument request,
      RpcCallback<AggregateResponse> done) {
    InternalScanner scanner = null;
    AggregateResponse response = null;
    try {
      ColumnInterpreter<T, S> ci = constructColumnInterpreterFromRequest(request);
      S sumVal = null, sumSqVal = null, tempVal = null;
      long rowCountVal = 0l;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      List<KeyValue> results = new ArrayList<KeyValue>();

      boolean hasMoreRows = false;
    
      do {
        tempVal = null;
        hasMoreRows = scanner.next(results);
        for (KeyValue kv : results) {
          tempVal = ci.add(tempVal, ci.castToReturnType(ci.getValue(colFamily,
              qualifier, kv)));
        }
        results.clear();
        sumVal = ci.add(sumVal, tempVal);
        sumSqVal = ci.add(sumSqVal, ci.multiply(tempVal, tempVal));
        rowCountVal++;
      } while (hasMoreRows);
      if (sumVal != null) {
        ByteString first_sumVal = ci.getProtoForPromotedType(sumVal);
        ByteString first_sumSqVal = ci.getProtoForPromotedType(sumSqVal);
        AggregateResponse.Builder pair = AggregateResponse.newBuilder();
        pair.addFirstPart(first_sumVal);
        pair.addFirstPart(first_sumSqVal);
        ByteBuffer bb = ByteBuffer.allocate(8).putLong(rowCountVal);
        bb.rewind();
        pair.setSecondPart(ByteString.copyFrom(bb));
        response = pair.build();
      }
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    done.run(response);
  }

  /**
   * Gives a List containing sum of values and sum of weights.
   * It is computed for the combination of column
   * family and column qualifier(s) in the given row range as defined in the
   * Scan object. In its current implementation, it takes one column family and
   * two column qualifiers. The first qualifier is for values column and 
   * the second qualifier (optional) is for weight column.
   */
  @Override
  public void getMedian(RpcController controller, AggregateArgument request,
      RpcCallback<AggregateResponse> done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    try {
      ColumnInterpreter<T, S> ci = constructColumnInterpreterFromRequest(request);
      S sumVal = null, sumWeights = null, tempVal = null, tempWeight = null;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] valQualifier = null, weightQualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        valQualifier = qualifiers.pollFirst();
        // if weighted median is requested, get qualifier for the weight column
        weightQualifier = qualifiers.pollLast();
      }
      List<KeyValue> results = new ArrayList<KeyValue>();

      boolean hasMoreRows = false;
    
      do {
        tempVal = null;
        tempWeight = null;
        hasMoreRows = scanner.next(results);
        for (KeyValue kv : results) {
          tempVal = ci.add(tempVal, ci.castToReturnType(ci.getValue(colFamily,
              valQualifier, kv)));
          if (weightQualifier != null) {
            tempWeight = ci.add(tempWeight,
                ci.castToReturnType(ci.getValue(colFamily, weightQualifier, kv)));
          }
        }
        results.clear();
        sumVal = ci.add(sumVal, tempVal);
        sumWeights = ci.add(sumWeights, tempWeight);
      } while (hasMoreRows);
      ByteString first_sumVal = ci.getProtoForPromotedType(sumVal);
      S s = sumWeights == null ? ci.castToReturnType(ci.getMinValue()) : sumWeights;
      ByteString first_sumWeights = ci.getProtoForPromotedType(s);
      AggregateResponse.Builder pair = AggregateResponse.newBuilder();
      pair.addFirstPart(first_sumVal);
      pair.addFirstPart(first_sumWeights); 
      response = pair.build();
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    done.run(response);
  }

  @SuppressWarnings("unchecked")
  ColumnInterpreter<T,S> constructColumnInterpreterFromRequest(
      AggregateArgument request) throws IOException {
    String className = request.getInterpreterClassName();
    Class<?> cls;
    try {
      cls = Class.forName(className);
      ColumnInterpreter<T,S> ci = (ColumnInterpreter<T, S>) cls.newInstance();
      if (request.hasInterpreterSpecificBytes()) {
        ci.initialize(request.getInterpreterSpecificBytes());
      }
      return ci;
    } catch (ClassNotFoundException e) {
      throw new IOException(e);
    } catch (InstantiationException e) {
      throw new IOException(e);
    } catch (IllegalAccessException e) {
      throw new IOException(e);
    }
  }

  @Override
  public Service getService() {
    return this;
  }

  /**
   * Stores a reference to the coprocessor environment provided by the
   * {@link org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost} from the region where this
   * coprocessor is loaded.  Since this is a coprocessor endpoint, it always expects to be loaded
   * on a table region, so always expects this to be an instance of
   * {@link RegionCoprocessorEnvironment}.
   * @param env the environment provided by the coprocessor host
   * @throws IOException if the provided environment is not an instance of
   * {@code RegionCoprocessorEnvironment}
   */
  @Override
  public void start(CoprocessorEnvironment env) throws IOException {
    if (env instanceof RegionCoprocessorEnvironment) {
      this.env = (RegionCoprocessorEnvironment)env;
    } else {
      throw new CoprocessorException("Must be loaded on a table region!");
    }
  }

  @Override
  public void stop(CoprocessorEnvironment env) throws IOException {
    // nothing to do
  }
  
}