/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.zebra.pig;
import java.io.IOException;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.schema.ColumnType;
import org.apache.hadoop.zebra.schema.Schema.ColumnSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.data.DataType;
import org.apache.pig.ResourceSchema;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
class SchemaConverter {
public static ColumnType toTableType(byte ptype) {
ColumnType ret;
switch (ptype) {
case DataType.INTEGER:
ret = ColumnType.INT;
break;
case DataType.LONG:
ret = ColumnType.LONG;
break;
case DataType.FLOAT:
ret = ColumnType.FLOAT;
break;
case DataType.DOUBLE:
ret = ColumnType.DOUBLE;
break;
case DataType.BOOLEAN:
ret = ColumnType.BOOL;
break;
case DataType.DATETIME:
ret = ColumnType.DATETIME;
break;
case DataType.BAG:
ret = ColumnType.COLLECTION;
break;
case DataType.MAP:
ret = ColumnType.MAP;
break;
case DataType.TUPLE:
ret = ColumnType.RECORD;
break;
case DataType.CHARARRAY:
ret = ColumnType.STRING;
break;
case DataType.BYTEARRAY:
ret = ColumnType.BYTES;
break;
default:
ret = null;
break;
}
return ret;
}
public static Schema toPigSchema(
org.apache.hadoop.zebra.schema.Schema tschema)
throws FrontendException {
Schema ret = new Schema();
for (String col : tschema.getColumns()) {
org.apache.hadoop.zebra.schema.Schema.ColumnSchema columnSchema =
tschema.getColumn(col);
if (columnSchema != null) {
ColumnType ct = columnSchema.getType();
if (ct == org.apache.hadoop.zebra.schema.ColumnType.RECORD ||
ct == org.apache.hadoop.zebra.schema.ColumnType.COLLECTION)
ret.add(new FieldSchema(col, toPigSchema(columnSchema.getSchema()), ct.pigDataType()));
else
ret.add(new FieldSchema(col, ct.pigDataType()));
} else {
ret.add(new FieldSchema(null, null));
}
}
return ret;
}
public static org.apache.hadoop.zebra.schema.Schema fromPigSchema(
Schema pschema) throws FrontendException, ParseException {
org.apache.hadoop.zebra.schema.Schema tschema = new org.apache.hadoop.zebra.schema.Schema();
Schema.FieldSchema columnSchema;
for (int i = 0; i < pschema.size(); i++) {
columnSchema = pschema.getField(i);
if (columnSchema != null) {
if (DataType.isSchemaType(columnSchema.type))
tschema.add(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(columnSchema.alias,
fromPigSchema(columnSchema.schema), toTableType(columnSchema.type)));
else if (columnSchema.type == DataType.MAP)
tschema.add(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(columnSchema.alias,
new org.apache.hadoop.zebra.schema.Schema(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(null,
org.apache.hadoop.zebra.schema.ColumnType.BYTES)), toTableType(columnSchema.type)));
else
tschema.add(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(columnSchema.alias, toTableType(columnSchema.type)));
} else {
tschema.add(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(null, ColumnType.ANY));
}
}
return tschema;
}
public static org.apache.hadoop.zebra.schema.Schema convertFromResourceSchema(ResourceSchema rSchema)
throws ParseException {
if( rSchema == null )
return null;
org.apache.hadoop.zebra.schema.Schema schema = new org.apache.hadoop.zebra.schema.Schema();
ResourceSchema.ResourceFieldSchema[] fields = rSchema.getFields();
for( ResourceSchema.ResourceFieldSchema field : fields ) {
String name = field.getName();
ColumnType type = toTableType( field.getType() );
org.apache.hadoop.zebra.schema.Schema cSchema = convertFromResourceSchema( field.getSchema() );
if( type == ColumnType.MAP && cSchema == null ) {
cSchema = new org.apache.hadoop.zebra.schema.Schema();
cSchema.add( new org.apache.hadoop.zebra.schema.Schema.ColumnSchema( "", ColumnType.BYTES ) );
}
org.apache.hadoop.zebra.schema.Schema.ColumnSchema columnSchema =
new org.apache.hadoop.zebra.schema.Schema.ColumnSchema( name, cSchema, type );
schema.add( columnSchema );
}
return schema;
}
public static ResourceSchema convertToResourceSchema(org.apache.hadoop.zebra.schema.Schema tSchema)
throws IOException {
if( tSchema == null )
return null;
ResourceSchema rSchema = new ResourceSchema();
int fieldCount = tSchema.getNumColumns();
ResourceFieldSchema[] rFields = new ResourceFieldSchema[fieldCount];
for( int i = 0; i < fieldCount; i++ ) {
org.apache.hadoop.zebra.schema.Schema.ColumnSchema cSchema = tSchema.getColumn( i );
if( cSchema != null )
rFields[i] = convertToResourceFieldSchema( cSchema );
else
rFields[i] = new ResourceFieldSchema();
}
rSchema.setFields( rFields );
return rSchema;
}
private static ResourceFieldSchema convertToResourceFieldSchema(
ColumnSchema cSchema) throws IOException {
ResourceFieldSchema field = new ResourceFieldSchema();
if( cSchema.getType() ==ColumnType.ANY && cSchema.getName().isEmpty() ) { // For anonymous column
field.setName( null );
field.setType( DataType.BYTEARRAY );
field.setSchema( null );
} else {
field.setName( cSchema.getName() );
field.setType( cSchema.getType().pigDataType() );
if( cSchema.getType() == ColumnType.MAP ) {
// Pig doesn't want any schema for a map field.
field.setSchema( null );
} else {
org.apache.hadoop.zebra.schema.Schema fs = cSchema.getSchema();
ResourceSchema rs = convertToResourceSchema( fs );
if( cSchema.getType() == ColumnType.COLLECTION ) {
int count = fs.getNumColumns();
if( count > 1 || ( count == 1 && fs.getColumn( 0 ).getType() != ColumnType.RECORD ) ) {
// Pig requires a record (tuple) as the schema for a BAG field.
ResourceFieldSchema fieldSchema = new ResourceFieldSchema();
fieldSchema.setSchema( rs );
fieldSchema.setType( ColumnType.RECORD.pigDataType() );
rs = new ResourceSchema();
rs.setFields( new ResourceFieldSchema[] { fieldSchema } );
}
}
field.setSchema( rs );
}
}
return field;
}
}