package org.apache.hadoop.hbase.regionserver.pbase.util; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.io.pfile.PFileReader; import org.apache.hadoop.hbase.regionserver.RecordScanner; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import parquet.schema.MessageType; import java.io.IOException; import java.util.LinkedList; import java.util.List; /** * Created by wangxiaoyi on 15/6/11. * * help parquet file read test */ public class ParquetReadUtil { public static Path rootPath = new Path("hdfs://localhost:9000/parquet"); /** * load file path * @param path of the dir * @return */ public static List<Path> loadParquetFiles(Path path){ FileSystem fs = HDFSUtil.loadFileSystem(path); try { FileStatus[] statuses = fs.listStatus(path); List<Path> paths = new LinkedList<>(); for(FileStatus status : statuses){ paths.add(status.getPath()); } return paths; }catch (IOException ioe){ System.out.println(ioe.getMessage()); } return new LinkedList<>(); } /** * get parquet file scanner * @param paths parquet files * @param schema for parquet reader * @return */ public static List<RecordScanner> getParquetFileScanner(List<Path> paths, MessageType schema){ List<RecordScanner> scanners = new LinkedList<>(); for(Path file : paths){ try { PFileReader reader = new PFileReader(file, new Configuration(), schema); RecordScanner scanner = reader.getScanner(); scanners.add(scanner); }catch (IOException ioe){ } } return scanners; } @Test public void testReadWriteValue(){ List<Path> paths = loadParquetFiles(new Path(rootPath, "pfile")); List<RecordScanner> scanners = getParquetFileScanner(paths, null); List<String> rowkeys = new LinkedList<>(); for(RecordScanner scanner : scanners){ while (scanner.hasNext()){ List<Cell> cells = scanner.next(); if(!cells.isEmpty()){ rowkeys.add(Bytes.toString(cells.get(0).getRow())); } } } for(int i = 0; i < 500; ++i){ String row = String.format("%10d", i + 1); org.junit.Assert.assertEquals("read row key is not equals to the wrote row key", row, rowkeys.get(i)); } } }