ParquetRecordWriter.java example

Explorer
pbase-master
/* 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package parquet.hadoop;

import java.io.IOException;
import java.util.Map;

import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import parquet.column.ParquetProperties.WriterVersion;
import parquet.hadoop.CodecFactory.BytesCompressor;
import parquet.hadoop.api.WriteSupport;
import parquet.schema.MessageType;

import static parquet.Preconditions.checkNotNull;

/**
 * Writes records to a Parquet file
 *
 * @param <T> the type of the materialized records
 * @author Julien Le Dem
 * @see ParquetOutputFormat
 */
public class ParquetRecordWriter<T> extends RecordWriter<Void, T> {

    private InternalParquetRecordWriter<T> internalWriter;
    private MemoryManager memoryManager;

    /**
     * @param w                  the file to write to
     * @param writeSupport       the class to convert incoming records
     * @param schema             the schema of the records
     * @param extraMetaData      extra meta data to write in the footer of the file
     * @param blockSize          the size of a block in the file (this will be approximate)
     * @param compressor         the compressor used to compress the pages
     * @param dictionaryPageSize the threshold for dictionary size
     * @param enableDictionary   to enable the dictionary
     * @param validating         if schema validation should be turned on
     */
    @Deprecated
    public ParquetRecordWriter(
            ParquetFileWriter w,
            WriteSupport<T> writeSupport,
            MessageType schema,
            Map<String, String> extraMetaData,
            int blockSize, int pageSize,
            BytesCompressor compressor,
            int dictionaryPageSize,
            boolean enableDictionary,
            boolean validating,
            WriterVersion writerVersion) {
        internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
                extraMetaData, blockSize, pageSize, compressor, dictionaryPageSize, enableDictionary,
                validating, writerVersion);
    }

    /**
     * @param w                  the file to write to
     * @param writeSupport       the class to convert incoming records
     * @param schema             the schema of the records
     * @param extraMetaData      extra meta data to write in the footer of the file
     * @param blockSize          the size of a block in the file (this will be approximate)
     * @param compressor         the compressor used to compress the pages
     * @param dictionaryPageSize the threshold for dictionary size
     * @param enableDictionary   to enable the dictionary
     * @param validating         if schema validation should be turned on
     */
    public ParquetRecordWriter(
            ParquetFileWriter w,
            WriteSupport<T> writeSupport,
            MessageType schema,
            Map<String, String> extraMetaData,
            long blockSize, int pageSize,
            BytesCompressor compressor,
            int dictionaryPageSize,
            boolean enableDictionary,
            boolean validating,
            WriterVersion writerVersion,
            MemoryManager memoryManager) {
        internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
                extraMetaData, blockSize, pageSize, compressor, dictionaryPageSize, enableDictionary,
                validating, writerVersion);
        this.memoryManager = checkNotNull(memoryManager, "memoryManager");
        memoryManager.addWriter(internalWriter, blockSize);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
        internalWriter.close();
        if (memoryManager != null) {
            memoryManager.removeWriter(internalWriter);
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void write(Void key, T value) throws IOException, InterruptedException {
        internalWriter.write(value);
    }

}