package apps; /** * Created by wangxiaoyi on 15/5/25. */ import com.google.common.primitives.Bytes; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import parquet.example.data.Group; import parquet.example.data.simple.SimpleGroupFactory; import parquet.hadoop.ParquetFileInfo; import parquet.hadoop.ParquetReader; import parquet.hadoop.example.GroupReadSupport; import parquet.schema.GroupType; import parquet.schema.MessageTypeParser; import parquet.schema.Type; import java.io.IOException; import java.util.LinkedList; import java.util.List; /** * Created by wangxiaoyi on 15/4/20. * <p/> * <p/> * read all the data from parquet file by default * we can add the read schema into the read conf for read specific columns */ public class TestParquetRead { public static void main(String[] args) throws IOException { Path root = new Path("hdfs://localhost:9000/parquet/");//文件夹路径 Configuration configuration = new Configuration(); String schema = " message test { " + " required binary name; " + " required int32 age; " + " }"; //自定义查询的列,read_schema可以自定义,查询schema可以由客户端创建,发送到服务器端执行。 //configuration.set(ReadSupport.PARQUET_READ_SCHEMA,"" + schema); Path file = new Path(root, "people1.parquet"); try { ParquetReader<Group> reader = ParquetReader. builder(new GroupReadSupport(), file) .withConf(configuration) .build(); ParquetFileInfo fileInfo = reader.getFileInfo(); Group group = null; int rowCount = 0; while ((group = reader.read()) != null) { GroupType type = group.getType(); for(Type t : type.getFields()){ //byte [] v = group.get byte [] value = group.getBinary(t.getName(), 0).getBytes(); int a = 1; } rowCount++; String name = new String(group.getBinary("name", 0).getBytes(), "UTF-8"); int age = group.getInteger("age", 0); System.out.println("name : " + name + " age : " + age); } System.out.println("row count " + rowCount); reader.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } }