/** * Copyright 2011 LiveRamp * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.liveramp.hank.cascading; import cascading.flow.FlowProcess; import cascading.scheme.Scheme; import cascading.scheme.SinkCall; import cascading.scheme.SourceCall; import cascading.tap.Tap; import cascading.tap.hadoop.Hfs; import cascading.tuple.Fields; import cascading.tuple.TupleEntry; import com.liveramp.hank.hadoop.*; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.RecordReader; import java.io.IOException; /** * A sink-only tap to write tuples to Hank Domains. */ public class DomainBuilderTap extends Hfs { private static final long serialVersionUID = 1L; private final String domainName; private final Class<? extends DomainBuilderAbstractOutputFormat> outputFormatClass; public DomainBuilderTap(String keyFieldName, String valueFieldName, int versionNumber, DomainBuilderProperties properties) { // Set the output to the temporary output path super(new DomainBuilderScheme(DomainBuilderAssembly.PARTITION_FIELD_NAME, keyFieldName, valueFieldName), properties.getTmpOutputPath(versionNumber)); this.domainName = properties.getDomainName(); this.outputFormatClass = properties.getOutputFormatClass(); } public void sinkConfInit(FlowProcess<JobConf> process, JobConf conf){ super.sinkConfInit(process, conf); // Output Format conf.setOutputFormat(this.outputFormatClass); // Output Committer conf.setOutputCommitter(DomainBuilderOutputCommitter.class); // Set this tap's Domain name locally in the conf if (conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME) != null) { throw new RuntimeException("Trying to set domain name configuration parameter to " + domainName + " but it was previously set to " + conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME)); } else { conf.set(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME, domainName); } } @Override public void sourceConfInit(FlowProcess process, JobConf conf){ throw new RuntimeException("DomainBuilderTap cannot be used as a source"); } private static class DomainBuilderScheme extends Scheme<JobConf, RecordReader, OutputCollector, Object[], Void> { private static final long serialVersionUID = 1L; private final String partitionFieldName; private final String keyFieldName; private final String valueFieldName; public DomainBuilderScheme(String partitionFieldName, String keyFieldName, String valueFieldName) { super(new Fields(keyFieldName, valueFieldName), new Fields(partitionFieldName, keyFieldName, valueFieldName)); this.partitionFieldName = partitionFieldName; this.keyFieldName = keyFieldName; this.valueFieldName = valueFieldName; } @Override public void sourceConfInit(FlowProcess<JobConf> jobConfFlowProcess, Tap<JobConf, RecordReader, OutputCollector> jobConfRecordReaderOutputCollectorTap, JobConf entries) { throw new RuntimeException("DomainBuilderScheme cannot be used as a source."); } @Override public void sinkConfInit(FlowProcess<JobConf> jobConfFlowProcess, Tap<JobConf, RecordReader, OutputCollector> jobConfRecordReaderOutputCollectorTap, JobConf entries) { } @Override public boolean source(FlowProcess<JobConf> jobConfFlowProcess, SourceCall<Object[], RecordReader> recordReaderSourceCall) throws IOException { throw new RuntimeException("DomainBuilderScheme cannot be used as a source."); } @Override public void sink(FlowProcess<JobConf> jobConfFlowProcess, SinkCall<Void, OutputCollector> sinkCall) throws IOException { TupleEntry tupleEntry = sinkCall.getOutgoingEntry(); IntWritable partition = new IntWritable(tupleEntry.getInteger(partitionFieldName)); BytesWritable key = (BytesWritable) tupleEntry.getObject(keyFieldName); BytesWritable value = (BytesWritable) tupleEntry.getObject(valueFieldName); KeyAndPartitionWritable keyAndPartitionWritable = new KeyAndPartitionWritable(key, partition); ValueWritable valueWritable = new ValueWritable(value); sinkCall.getOutput().collect(keyAndPartitionWritable, valueWritable); } } }