package com.cloudera.sa.hcu.io.get; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.RCFile; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.io.LongWritable; public class GetRcFile extends AbstractGetter { public static void main(String[] args) throws Exception { (new GetRcFile()).getFile(args); } @Override public void getFile(String[] args) throws Exception { if (args.length < 2) { System.out.println("Get RC File:"); System.out.println(); System.out.println("Parameter: <hdfs input file path> <local output data file path> <optionally define a delimiter>"); System.out.println(); System.out.println("Note: default delimiter is a '|'."); } String inputLocation = args[0]; String outputLocation = args[1]; String delimiter = "|"; if (args.length > 2) { delimiter = args[2]; } Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); Path inputFilePath = new Path(inputLocation); RCFile.Reader reader = new RCFile.Reader(hdfs, inputFilePath, config); BufferedWriter localDataWriter = new BufferedWriter(new FileWriter(new File(outputLocation))); try { LongWritable next = new LongWritable(1); BytesRefArrayWritable row = new BytesRefArrayWritable(); while (reader.next(next)) { reader.getCurrentRow(row); for (int j = 0; j < row.size(); j++) { BytesRefWritable byteWritable = row.get(j); if (byteWritable.getStart() >= 0 && byteWritable.getLength() > 0) { localDataWriter.write(new String(byteWritable.getData()).substring(byteWritable.getStart(), byteWritable.getStart() + byteWritable.getLength())); } if (j < row.size() - 1) { localDataWriter.write(delimiter); } } localDataWriter.newLine(); onWritenRecord(); } }finally { localDataWriter.close(); reader.close(); } onFinishedWriting(); } }