package com.manning.hip.ch13; import org.apache.commons.lang.builder.ToStringBuilder; import org.apache.commons.lang.builder.ToStringStyle; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.serializer.Deserializer; import org.apache.hadoop.io.serializer.SerializationFactory; import org.apache.hadoop.mapreduce.split.JobSplit; import java.io.DataInputStream; import java.io.FileInputStream; import java.io.IOException; public class TaskSplitReader { public static void main(String... args) throws IOException { String taskSplitFile = args[0]; Configuration conf = new Configuration(); DataInputStream is = new DataInputStream(new FileInputStream(taskSplitFile)); JobSplit.TaskSplitIndex taskSplitIndex = new JobSplit.TaskSplitIndex(); taskSplitIndex.readFields(is); is.close(); Object split = getSplitDetails(conf, new Path(taskSplitIndex.getSplitLocation()), taskSplitIndex.getStartOffset()); System.out.println( "InputSplit instance class = " + split.getClass().getName()); System.out.println("ToString on split = " + split); System.out.println("Reflection fields = " + ToStringBuilder .reflectionToString(split, ToStringStyle.SHORT_PREFIX_STYLE)); } public static <T> T getSplitDetails(Configuration conf, Path file, long offset) throws IOException { FileSystem fs = file.getFileSystem(conf); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<T> cls; try { cls = (Class<T>) conf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(conf); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(inFile); T split = deserializer.deserialize(null); inFile.close(); return split; } }