package com.twitter.elephantbird.crunch;
import com.google.protobuf.Message;
import com.twitter.elephantbird.mapreduce.input.LzoProtobufBlockInputFormat;
import org.apache.crunch.ReadableData;
import org.apache.crunch.io.FormatBundle;
import org.apache.crunch.io.ReadableSource;
import org.apache.crunch.io.impl.FileSourceImpl;
import org.apache.crunch.types.PType;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.util.List;
/**
* A Crunch {@code Source} for reading protocol buffers from an {@link LzoProtobufBlockInputFormat} file. The
* format implements the {@code ReadableSource} interface, so records from these files can be read for
* in-memory joins.
*/
public class LzoProtobufSource<T extends Message> extends FileSourceImpl<T> implements ReadableSource<T> {
/**
* Factory method for creating a new {@code LzoProtobufSource} from a given path and protocol buffer
* message class.
*
* @param path path to the data
* @param protoClass the Message class to read
* @return a new {@code LzoProtobufSource}
*/
public static <S extends Message> LzoProtobufSource<S> at(Path path, Class<S> protoClass) {
return new LzoProtobufSource<S>(path, EBTypes.protos(protoClass));
}
/**
* Factory method for creating a new {@code LzoProtobufSource} from the given paths and protocol buffer
* message class.
*
* @param paths paths to the data
* @param protoClass the Message class to read
* @return a new {@code LzoProtobufSource}
*/
public static <S extends Message> LzoProtobufSource<S> at(List<Path> paths, Class<S> protoClass) {
return new LzoProtobufSource<S>(paths, EBTypes.protos(protoClass));
}
private static <T> FormatBundle<LzoProtobufBlockInputFormat> getBundle(PType<T> ptype) {
return FormatBundle.forInput(LzoProtobufBlockInputFormat.class)
.set("elephantbird.class.for.MultiInputFormat", ptype.getTypeClass().getName());
}
public LzoProtobufSource(Path path, PType<T> ptype) {
super(path, ptype, getBundle(ptype));
}
public LzoProtobufSource(List<Path> paths, PType<T> ptype) {
super(paths, ptype, getBundle(ptype));
}
@Override
public Iterable<T> read(Configuration conf) throws IOException {
return read(conf, new ProtobufFileReaderFactory<T>(ptype));
}
@Override
public ReadableData<T> asReadable() {
return new ProtobufReadableData<T>(paths, ptype);
}
}