/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.storage.parquet;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.Log;
import org.apache.parquet.hadoop.api.InitContext;
import org.apache.parquet.hadoop.api.ReadSupport;
import org.apache.parquet.io.api.RecordMaterializer;
import org.apache.parquet.schema.MessageType;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.storage.Tuple;
import java.util.Map;
/**
* Tajo implementation of {@link parquet.hadoop.api.ReadSupport} for {@link org.apache.tajo.storage.Tuple}s.
* Users should use {@link org.apache.tajo.storage.parquet.ParquetScanner} and not this class directly.
*/
public class TajoReadSupport extends ReadSupport<Tuple> {
private static final Log LOG = Log.getLog(TajoReadSupport.class);
private Schema readSchema;
private Schema requestedSchema;
/**
* Creates a new TajoReadSupport.
*
* @param requestedSchema The Tajo schema of the requested projection passed
* down by ParquetScanner.
*/
public TajoReadSupport(Schema readSchema, Schema requestedSchema) {
super();
this.readSchema = readSchema;
this.requestedSchema = requestedSchema;
}
/**
* Creates a new TajoReadSupport.
*
* @param readSchema The schema of the table.
*/
public TajoReadSupport(Schema readSchema) {
super();
this.readSchema = readSchema;
this.requestedSchema = readSchema;
}
/**
* Initializes the ReadSupport.
*
* @param context The InitContext.
* @return A ReadContext that defines how to read the file.
*/
@Override
public ReadSupport.ReadContext init(InitContext context) {
if (requestedSchema == null) {
throw new RuntimeException("requestedSchema is null.");
}
MessageType requestedParquetSchema =
new TajoSchemaConverter().convert(requestedSchema);
LOG.debug("Reading data with projection:\n" + requestedParquetSchema);
return new ReadContext(requestedParquetSchema);
}
/**
* Prepares for read.
*
* @param configuration The job configuration.
* @param keyValueMetaData App-specific metadata from the file.
* @param fileSchema The schema of the Parquet file.
* @param readContext Returned by the init method.
*/
@Override
public RecordMaterializer<Tuple> prepareForRead(
Configuration configuration,
Map<String, String> keyValueMetaData,
MessageType fileSchema,
ReadContext readContext) {
MessageType parquetRequestedSchema = readContext.getRequestedSchema();
return new TajoRecordMaterializer(parquetRequestedSchema, requestedSchema, readSchema);
}
}