/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package parquet.thrift;
import static parquet.schema.ConversionPatterns.listType;
import static parquet.schema.ConversionPatterns.mapType;
import static parquet.schema.OriginalType.ENUM;
import static parquet.schema.OriginalType.UTF8;
import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
import static parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import static parquet.schema.Type.Repetition.OPTIONAL;
import static parquet.schema.Type.Repetition.REPEATED;
import static parquet.schema.Type.Repetition.REQUIRED;
import static parquet.schema.Types.primitive;
import java.util.ArrayList;
import java.util.List;
import parquet.schema.GroupType;
import parquet.schema.MessageType;
import parquet.schema.OriginalType;
import parquet.schema.PrimitiveType;
import parquet.schema.PrimitiveType.PrimitiveTypeName;
import parquet.schema.Type;
import parquet.schema.Types.PrimitiveBuilder;
import parquet.thrift.projection.FieldProjectionFilter;
import parquet.thrift.projection.FieldsPath;
import parquet.thrift.projection.ThriftProjectionException;
import parquet.thrift.struct.ThriftField;
import parquet.thrift.struct.ThriftType;
/**
* Visitor Class for converting a thrift definiton to parquet message type.
* Projection can be done by providing a {@link FieldProjectionFilter}
*
* @author Tianshuo Deng
*/
public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
FieldProjectionFilter fieldProjectionFilter;
Type currentType;
FieldsPath currentFieldPath = new FieldsPath();
Type.Repetition currentRepetition = Type.Repetition.REPEATED;//MessageType is repeated GroupType
String currentName = "ParquetSchema";
public ThriftSchemaConvertVisitor(FieldProjectionFilter fieldProjectionFilter) {
this.fieldProjectionFilter = fieldProjectionFilter;
}
@Override
public void visit(ThriftType.MapType mapType) {
final ThriftField mapKeyField = mapType.getKey();
final ThriftField mapValueField = mapType.getValue();
//save env for map
String mapName = currentName;
Type.Repetition mapRepetition = currentRepetition;
//=========handle key
currentFieldPath.push(mapKeyField);
currentName = "key";
currentRepetition = REQUIRED;
mapKeyField.getType().accept(this);
Type keyType = currentType;//currentType is the already converted type
currentFieldPath.pop();
//=========handle value
currentFieldPath.push(mapValueField);
currentName = "value";
currentRepetition = OPTIONAL;
mapValueField.getType().accept(this);
Type valueType = currentType;
currentFieldPath.pop();
if (keyType == null && valueType == null) {
currentType = null;
return;
}
if (keyType == null && valueType != null)
throw new ThriftProjectionException("key of map is not specified in projection: " + currentFieldPath);
//restore Env
currentName = mapName;
currentRepetition = mapRepetition;
currentType = mapType(currentRepetition, currentName,
keyType,
valueType);
}
@Override
public void visit(ThriftType.SetType setType) {
final ThriftField setElemField = setType.getValues();
String setName = currentName;
Type.Repetition setRepetition = currentRepetition;
currentName = currentName + "_tuple";
currentRepetition = REPEATED;
setElemField.getType().accept(this);
//after conversion, currentType is the nested type
if (currentType == null) {
return;
} else {
currentType = listType(setRepetition, setName, currentType);
}
}
@Override
public void visit(ThriftType.ListType listType) {
final ThriftField setElemField = listType.getValues();
String listName = currentName;
Type.Repetition listRepetition = currentRepetition;
currentName = currentName + "_tuple";
currentRepetition = REPEATED;
setElemField.getType().accept(this);
//after conversion, currentType is the nested type
if (currentType == null) {
return;
} else {
currentType = listType(listRepetition, listName, currentType);
}
}
public MessageType getConvertedMessageType() {
// the root should be a GroupType
if (currentType == null)
return new MessageType(currentName, new ArrayList<Type>());
GroupType rootType = currentType.asGroupType();
return new MessageType(currentName, rootType.getFields());
}
@Override
public void visit(ThriftType.StructType structType) {
List<ThriftField> fields = structType.getChildren();
String oldName = currentName;
Type.Repetition oldRepetition = currentRepetition;
List<Type> types = getFieldsTypes(fields);
currentName = oldName;
currentRepetition = oldRepetition;
if (types.size() > 0) {
currentType = new GroupType(currentRepetition, currentName, types);
} else {
currentType = null;
}
}
private List<Type> getFieldsTypes(List<ThriftField> fields) {
List<Type> types = new ArrayList<Type>();
for (int i = 0; i < fields.size(); i++) {
ThriftField field = fields.get(i);
Type.Repetition rep = getRepetition(field);
currentRepetition = rep;
currentName = field.getName();
currentFieldPath.push(field);
field.getType().accept(this);
if (currentType != null) {
// currentType is converted with the currentName(fieldName)
types.add(currentType.withId(field.getFieldId()));
}
currentFieldPath.pop();
}
return types;
}
private boolean isCurrentlyMatchedFilter(){
if(!fieldProjectionFilter.isMatched(currentFieldPath)){
currentType = null;
return false;
}
return true;
}
private void primitiveType(PrimitiveTypeName type) {
primitiveType(type, null);
}
private void primitiveType(PrimitiveTypeName type, OriginalType orig) {
if (isCurrentlyMatchedFilter()) {
PrimitiveBuilder<PrimitiveType> b = primitive(type, currentRepetition);
if (orig != null) {
b = b.as(orig);
}
currentType = b.named(currentName);
}
}
@Override
public void visit(ThriftType.EnumType enumType) {
primitiveType(BINARY, ENUM);
}
@Override
public void visit(ThriftType.BoolType boolType) {
primitiveType(BOOLEAN);
}
@Override
public void visit(ThriftType.ByteType byteType) {
primitiveType(INT32);
}
@Override
public void visit(ThriftType.DoubleType doubleType) {
primitiveType(DOUBLE);
}
@Override
public void visit(ThriftType.I16Type i16Type) {
primitiveType(INT32);
}
@Override
public void visit(ThriftType.I32Type i32Type) {
primitiveType(INT32);
}
@Override
public void visit(ThriftType.I64Type i64Type) {
primitiveType(INT64);
}
@Override
public void visit(ThriftType.StringType stringType) {
primitiveType(BINARY, UTF8);
}
/**
* by default we can make everything optional
*
* @param thriftField
* @return
*/
private Type.Repetition getRepetition(ThriftField thriftField) {
if (thriftField == null) {
return OPTIONAL;
}
switch (thriftField.getRequirement()) {
case REQUIRED:
return REQUIRED;
case OPTIONAL:
return OPTIONAL;
case DEFAULT:
return OPTIONAL;
default:
throw new IllegalArgumentException("unknown requirement type: " + thriftField.getRequirement());
}
}
}