package org.apache.hadoop.hbase.regionserver;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.io.pfile.PFile;
import org.apache.hadoop.hbase.io.pfile.PFileReader;
import org.apache.hadoop.hbase.io.pfile.PFileWriter;
import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
import parquet.example.data.Group;
import parquet.example.data.GroupFactory;
import parquet.example.data.simple.SimpleGroupFactory;
import parquet.hadoop.ParquetFileInfo;
import parquet.hadoop.example.GroupWriteSupport;
import parquet.schema.MessageType;
import parquet.schema.MessageTypeParser;
import java.io.Closeable;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.HashMap;
import java.util.Map;
/**
* Created by wangxiaoyi on 15/5/26.
*
* provide the writer and reader for a parquet file
*
*/
public class PStoreFile {
private static final Log LOG = LogFactory.getLog(PStoreFile.class);
private static final String START_KEY = "startkey";
private static final String END_KEY = "endkey";
private ParquetFileInfo fileInfo;
private final FileSystem fs;
private Path filePath;
private Configuration conf;
private volatile PFileReader reader;
public PStoreFile(FileSystem fs, Path filePath, Configuration conf){
this.fs = fs;
this.filePath = filePath;
this.conf = conf;
}
/**
* used for the first time
* PStoreFile Load into memory
* @return whether store file init success
*/
protected boolean initStoreFile() throws IOException{
if(! fs.exists(filePath)){
LOG.error(filePath + "not exists !");
return false;
}
reader = createReader();
if(reader == null){
LOG.error("init Store File error!");
return false;
}
fileInfo = reader.getFileInfo();
reader.close();
return true;
}
public MessageType getSchema(){
return fileInfo.getFileSchema();
}
/**
* create a reader for a parquet file
* @param schema
* @return
*/
public PFileReader createReader(String schema){
return createReader(MessageTypeParser.parseMessageType(schema));
}
public PFileReader createReader(MessageType schema){
try {
return new PFileReader(filePath, conf, schema);
}catch (IOException ioe){
LOG.error(ioe.getMessage());
return null;
}
}
public PFileReader createReader(){
try {
return new PFileReader(filePath, conf, null);
}catch (IOException ioe){
LOG.error(ioe.getMessage());
return null;
}
}
/**
* @return start key of parquet
*/
public String getStartKey(){
return fileInfo.getMetaData(START_KEY);
}
/**
* @return end key of parquet
*/
public String getEndKey(){
return fileInfo.getMetaData(END_KEY);
}
public Path getPath(){
if(filePath == null)
filePath = fileInfo.getFilePath();
return filePath ;
}
/**
* reader for parquet
*/
public class Reader implements Closeable{
private PFileReader reader = null;
public Reader(String schema){
reader = createReader(schema);
}
public ParquetFileInfo getFileInfo(){
fileInfo = reader.getFileInfo();
return fileInfo;
}
public PFileReader getReader(){
return reader;
}
public PFileReader.PFileScanner getScanner(){
return reader.getScanner();
}
/**
* @return file record length
*/
public int getLength(){
return 0;
}
/**
* Closes this stream and releases any system resources associated
* with it. If the stream is already closed then invoking this
* method has no effect.
*
* @throws IOException if an I/O error occurs
*/
@Override
public void close() throws IOException {
reader.close();
}
}
/**
* to build writer
*/
public static class WriterBuilder {
private final Configuration conf;
//private final CacheConfig cacheConf;
private final FileSystem fs;
private MessageType schema;
private Path dir;
private Path filePath;
private Map<String, String> meta;
private InetSocketAddress[] favoredNodes;
public WriterBuilder(Configuration conf,
FileSystem fs, MessageType schema) {
this.conf = conf;
//this.cacheConf = cacheConf;
this.fs = fs;
this.schema = schema;
}
public WriterBuilder(Configuration conf,
FileSystem fs, MessageType schema, Path path) {
this.conf = conf;
//this.cacheConf = cacheConf;
this.fs = fs;
this.schema = schema;
this.filePath = path;
}
public WriterBuilder addSchema(MessageType schema){
this.schema = schema;
return this;
}
/**
* @param favoredNodes an array of favored nodes or possibly null
* @return this (for chained invocation)
*/
public WriterBuilder addFavoredNodes(InetSocketAddress[] favoredNodes) {
this.favoredNodes = favoredNodes;
return this;
}
public WriterBuilder addMetaData(Map<String, String> meta){
this.meta = meta;
return this;
}
/**
* Create a store file writer. Client is responsible for closing file when
* done. If metadata, add BEFORE closing using
* {@link org.apache.hadoop.hbase.regionserver.PStoreFile.Writer}.
*/
public Writer build() throws IOException {
if ((dir == null ? 0 : 1) + (filePath == null ? 0 : 1) != 1) {
throw new IllegalArgumentException("Either specify parent directory " +
"or file path");
}
if (dir == null) {
dir = filePath.getParent();
}
if (!fs.exists(dir)) {
fs.mkdirs(dir);
}
return new Writer(conf, filePath, schema, meta);
}
}
/**
* Writer in Store for Parquet File
*/
public static class Writer implements Compactor.CellSink {
protected PFile.Writer writer;
private MessageType schema ;
private GroupFactory gf;
private Path filePath;
private Writer(Configuration conf, Path file, MessageType schema, Map<String, String> meta){
this.filePath = file;
this.schema = schema;
gf = new SimpleGroupFactory(schema);
GroupWriteSupport.setSchema(schema, conf);
writer = new PFileWriter(conf, schema, new GroupWriteSupport(meta))
.addPath(file)
.build();
}
public void append(Mutation m){
writer.append(m.asGroup(gf));
}
public void append(Group group){
writer.append(group);
}
@Override
public void append(Cell cell) throws IOException {
//do nothing
}
public void close() throws IOException{
this.writer.close();
}
public Path getFilePath(){
return this.filePath;
}
}
public static void main(String [] args)throws IOException{
//PStoreFile storeFile = new PStoreFile(new HFileSystem(new Configuration(,)));
Configuration configuration = new Configuration();
Path path = new Path("hdfs://10.214.208.11:9000/parquet/wangxiaoyi3.parquet");
FileSystem fs = FileSystem.get(path.toUri(), configuration);
PStoreFile storeFile = new PStoreFile(fs, path, configuration);
//storeFile.initStoreFile();
//System.out.print("startkey " + storeFile.getStartKey());
//System.out.print("endykey " + storeFile.getEndKey());
String schema_str = " message people { " +
" required binary name; " +
" required int32 age; " +
" }";
Map<String, String> meta = new HashMap<>();
meta.put(START_KEY, "wangxiaoyi1");
meta.put(END_KEY, "wangxiaoyi99999");
MessageType schema = MessageTypeParser.parseMessageType(schema_str);
/* //write data
Writer writer = new WriterBuilder(
configuration,
fs,
schema,
path)
.addMetaData(meta)
.build();
SimpleGroupFactory sgf = new SimpleGroupFactory(schema);
for(int i = 1; i < 10000000; ++ i){
//2 10 万
//3 100 万
//4 1000 万
Group group = sgf.newGroup()
.append("name", "wangxiaoyi" + i)
.append("age", i);
writer.append(group);
}
writer.close();
*/
long start = System.currentTimeMillis();
//read with schema
PFileReader reader = storeFile.createReader(schema_str);
PFileReader.PFileScanner scanner = reader.getScanner();
while (scanner.hasNext()){
Group group = scanner.nextRow();
//System.out.print(new String(group.getBinary("name", 0).getBytes()) + "\t");
//System.out.println(group.getInteger("age", 0));
}
long end = System.currentTimeMillis();
System.out.println("total time : "+ (end - start));
//fs.delete(path, true);
}
}