/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import java.io.Serializable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.SampleDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.io.LongWritable;
/**
* Sample operator implementation.
**/
public class SamplingOperator extends Operator<SampleDesc> implements
Serializable {
private static final long serialVersionUID = 1L;
/**
* Counter.
*
*/
public static enum Counter {
FILTERED, PASSED
}
private final transient LongWritable filtered_count, passed_count;
//private transient ExprNodeEvaluator conditionEvaluator;
private double probability;
private transient PrimitiveObjectInspector conditionInspector;
private transient int consecutiveFails;
transient int heartbeatInterval;
public SamplingOperator() {
super();
filtered_count = new LongWritable();
passed_count = new LongWritable();
consecutiveFails = 0;
}
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
try {
heartbeatInterval = HiveConf.getIntVar(hconf,
HiveConf.ConfVars.HIVESENDHEARTBEAT);
probability = conf.getProbability();
LOG.info("------------- " + probability + " ---------");
statsMap.put(Counter.FILTERED, filtered_count);
statsMap.put(Counter.PASSED, passed_count);
conditionInspector = null;
} catch (Throwable e) {
throw new HiveException(e);
}
initializeChildren(hconf);
}
@Override
public void processOp(Object row, int tag) throws HiveException {
ObjectInspector rowInspector = inputObjInspectors[tag];
if(Math.random() <= probability ) {
forward(row, rowInspector);
passed_count.set(passed_count.get() + 1);
consecutiveFails = 0;
} else {
filtered_count.set(filtered_count.get() + 1);
consecutiveFails++;
// In case of a lot of consecutive failures, send a heartbeat in order to
// avoid timeout
if (((consecutiveFails % heartbeatInterval) == 0) && (reporter != null)) {
reporter.progress();
}
}
}
/**
* @return the name of the operator
*/
@Override
public String getName() {
return new String("SAMPLE");
}
@Override
public OperatorType getType() {
return OperatorType.FILTER;
}
}