package apps; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import parquet.column.ColumnDescriptor; import parquet.column.ParquetProperties; import parquet.example.data.Group; import parquet.example.data.simple.SimpleGroupFactory; import parquet.hadoop.ParquetWriter; import parquet.hadoop.api.WriteSupport; import parquet.hadoop.example.GroupWriteSupport; import parquet.hadoop.metadata.CompressionCodecName; import parquet.schema.GroupType; import parquet.schema.MessageType; import parquet.schema.MessageTypeParser; import parquet.schema.Type; import java.io.IOException; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; /** * Created by wangxiaoyi on 15/4/20. */ public class TestParquetWrite { public static void main(String[] args) throws IOException { Path root = new Path("hdfs://10.214.208.11:9000/parquet/");//文件夹路径 Configuration configuration = new Configuration(); MessageType schema = MessageTypeParser.parseMessageType( //parquet文件模式 " message people { " + "required binary rowkey;" + "required binary cf:name;" + "required binary cf:age;" + "required int64 timestamp;"+ " }"); GroupWriteSupport.setSchema(schema, configuration); SimpleGroupFactory sfg = new SimpleGroupFactory(schema); Path file = new Path(root, "people002.parquet"); Map<String, String> meta = new HashMap<String, String>(); meta.put("startkey", "1"); meta.put("endkey", "2"); ParquetWriter<Group> writer = new ParquetWriter<Group>( file, new GroupWriteSupport(meta), CompressionCodecName.UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_1_0, configuration); Group group = sfg.newGroup().append("rowkey", "1") .append("cf:name", "wangxiaoyi") .append("cf:age", "24") .append("timestamp", System.currentTimeMillis()); for (int i = 0; i < 10000; ++i) { writer.write( sfg.newGroup() .append("name", "wangxiaoyi" + i) .append("age", i)); } writer.close(); } }