/**
* Copyright 2011 LiveRamp
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.liveramp.hank.cascading;
import cascading.flow.FlowProcess;
import cascading.scheme.Scheme;
import cascading.scheme.SinkCall;
import cascading.scheme.SourceCall;
import cascading.tap.Tap;
import cascading.tap.hadoop.io.RecordReaderIterator;
import cascading.tuple.*;
import com.liveramp.hank.hadoop.DomainBuilderProperties;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.*;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class PartitionMarkerTap extends Tap<JobConf, RecordReader, OutputCollector> {
private final String domainName;
public PartitionMarkerTap(String domainName, String keyFieldName, String valueFieldName) {
super(new PartitionMarkerScheme(keyFieldName, valueFieldName));
this.domainName = domainName;
}
@Override
public String getIdentifier() {
return "__hank_partition_markers_for_" + domainName;
}
@Override
public TupleEntryIterator openForRead(FlowProcess<JobConf> jobConfFlowProcess, RecordReader recordReader) throws IOException {
return new TupleEntrySchemeIterator(jobConfFlowProcess, this.getScheme(), new RecordReaderIterator(recordReader));
}
@Override
public TupleEntryCollector openForWrite(FlowProcess<JobConf> jobConfFlowProcess, OutputCollector outputCollector) throws IOException {
throw new RuntimeException("PartitionMarkerTap cannot be used as a sink."); }
@Override
public boolean createResource(JobConf entries) throws IOException {
return true;
}
@Override
public boolean deleteResource(JobConf entries) throws IOException {
return true;
}
@Override
public boolean resourceExists(JobConf entries) throws IOException {
return false;
}
@Override
public long getModifiedTime(JobConf entries) throws IOException {
return 0;
}
private static class PartitionMarkerRecordReader implements RecordReader<IntWritable, IntWritable> {
private int currentPartition = 0;
private final int numPartitions;
public PartitionMarkerRecordReader(int numPartitions) {
this.numPartitions = numPartitions;
}
@Override
public boolean next(IntWritable key, IntWritable value) throws IOException {
// value is not used
if (currentPartition < numPartitions) {
key.set(currentPartition);
currentPartition += 1;
return currentPartition <= numPartitions;
} else {
return false;
}
}
@Override
public IntWritable createKey() {
return new IntWritable();
}
@Override
public IntWritable createValue() {
return new IntWritable();
}
@Override
public long getPos() throws IOException {
return currentPartition;
}
@Override
public void close() throws IOException {
}
@Override
public float getProgress() throws IOException {
return (float) currentPartition / (float) numPartitions;
}
}
private static class PartitionMarkerInputFormat implements InputFormat<IntWritable, IntWritable> {
@Override
public InputSplit[] getSplits(JobConf conf, int ignored) throws IOException {
String domainName = DomainBuilderProperties.getDomainName(conf);
int numPartitions = DomainBuilderProperties.getNumPartitions(domainName, conf);
return new InputSplit[]{new PartitionMarkerInputSplit(numPartitions)};
}
@Override
public RecordReader<IntWritable, IntWritable> getRecordReader(InputSplit inputSplit,
JobConf conf,
Reporter reporter) throws IOException {
return new PartitionMarkerRecordReader(((PartitionMarkerInputSplit) inputSplit).getNumPartitions());
}
}
private static class PartitionMarkerScheme extends Scheme<JobConf, RecordReader, OutputCollector, Object[], Void> {
public PartitionMarkerScheme(String keyFieldName, String valueFieldName) {
super(new Fields(keyFieldName,
valueFieldName,
DomainBuilderAssembly.PARTITION_FIELD_NAME,
DomainBuilderAssembly.COMPARABLE_KEY_FIELD_NAME));
}
@Override
public void sourceConfInit(FlowProcess<JobConf> jobConfFlowProcess, Tap<JobConf, RecordReader, OutputCollector> jobConfOutputCollectorTap, JobConf entries) {
entries.setInputFormat(PartitionMarkerInputFormat.class);
}
@Override
public void sinkConfInit(FlowProcess<JobConf> jobConfFlowProcess, Tap<JobConf, RecordReader, OutputCollector> jobConfOutputCollectorTap, JobConf entries) {
throw new RuntimeException("PartitionMarkerScheme cannot be used as a sink.");
}
@Override
public boolean source(FlowProcess<JobConf> jobConfFlowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException {
Tuple tuple = sourceCall.getIncomingEntry().getTuple();
IntWritable partition = (IntWritable) sourceCall.getInput().createKey();
boolean result = sourceCall.getInput().next(partition, null);
if(!result){
return false;
}
tuple.set(0, null);
tuple.set(1, null);
tuple.set(2, partition);
tuple.set(3, null);
return true;
}
@Override
public void sink(FlowProcess<JobConf> jobConfFlowProcess, SinkCall<Void, OutputCollector> outputCollectorSinkCall) throws IOException {
throw new RuntimeException("PartitionMarkerScheme cannot be used as a sink.");
}
}
private static class PartitionMarkerInputSplit implements InputSplit {
private Integer numPartitions;
public PartitionMarkerInputSplit() {
this.numPartitions = null;
}
public PartitionMarkerInputSplit(int numPartitions) {
this.numPartitions = numPartitions;
}
@Override
public long getLength() throws IOException {
return 1;
}
@Override
public String[] getLocations() throws IOException {
return new String[]{};
}
@Override
public void write(DataOutput dataOutput) throws IOException {
WritableUtils.writeVInt(dataOutput, numPartitions);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
numPartitions = WritableUtils.readVInt(dataInput);
}
public int getNumPartitions() {
return numPartitions;
}
}
}