package com.twitter.elephantbird.cascading2.scheme; import com.twitter.elephantbird.mapreduce.input.combine.DelegateCombineFileInputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.RecordReader; import com.twitter.elephantbird.mapred.output.DeprecatedOutputFormatWrapper; import com.twitter.elephantbird.mapreduce.input.MultiInputFormat; import com.twitter.elephantbird.mapreduce.io.ThriftWritable; import com.twitter.elephantbird.mapreduce.output.LzoThriftBlockOutputFormat; import com.twitter.elephantbird.util.ThriftUtils; import com.twitter.elephantbird.util.TypeRef; import cascading.flow.FlowProcess; import cascading.tap.Tap; import org.apache.thrift.TBase; /** * Scheme for Thrift lzo compressed files. * * @author Argyris Zymnis */ public class LzoThriftScheme<M extends TBase<?,?>> extends LzoBinaryScheme<M, ThriftWritable<M>> { private static final long serialVersionUID = -5011096855302946109L; private Class thriftClass; public LzoThriftScheme(Class thriftClass) { this.thriftClass = thriftClass; } @Override public void sinkConfInit(FlowProcess<JobConf> hfp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { LzoThriftBlockOutputFormat.setClassConf(thriftClass, conf); DeprecatedOutputFormatWrapper.setOutputFormat(LzoThriftBlockOutputFormat.class, conf); } protected ThriftWritable<M> prepareBinaryWritable() { TypeRef<M> typeRef = (TypeRef<M>) ThriftUtils.getTypeRef(thriftClass); return new ThriftWritable(typeRef); } @Override public void sourceConfInit(FlowProcess<JobConf> hfp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { MultiInputFormat.setClassConf(thriftClass, conf); DelegateCombineFileInputFormat.setDelegateInputFormat(conf, MultiInputFormat.class); } }