/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.utils; import java.util.ArrayList; import java.util.List; import org.apache.pig.data.DataType; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.block.ColumnType; public class SchemaUtils { public static ColumnType coltypeFromFieldSchema(String colName, FieldSchema colSchema) { ColumnType t = new ColumnType(); t.setName(colName); t.setType(convertoRCFTypeName(DataType.findTypeName(colSchema.type))); if (colSchema.schema != null) { try { t.setColumnSchema(convertToBlockSchema(colSchema.schema)); } catch (FrontendException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return t; } public static BlockSchema fieldSchemaToBlockSchema(FieldSchema fschema) { ColumnType[] ctypes = new ColumnType[1]; ColumnType ct = coltypeFromFieldSchema(fschema.alias, fschema); ctypes[0] = ct; return new BlockSchema(ctypes); } public static BlockSchema convertToBlockSchema(Schema schema) throws FrontendException { ColumnType[] ctypes = new ColumnType[schema.size()]; for (int i = 0; i < ctypes.length; i++) { ColumnType ct = new ColumnType(); FieldSchema fs = schema.getField(i); ct.setName(fs.alias); ct.setType(convertoRCFTypeName(DataType.findTypeName(fs.type))); if (fs.schema != null) { ct.setColumnSchema(convertToBlockSchema(fs.schema)); } ctypes[i] = ct; } return new BlockSchema(ctypes); } public static Schema convertFromBlockSchema(BlockSchema blockSchema) throws FrontendException { List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>(); for (int i = 0; i < blockSchema.getNumColumns(); i++) { ColumnType ctype = blockSchema.getColumnType(i); byte pigtype = convertToPigType(ctype.getType().toString()); if (ctype.getColumnSchema() != null) { Schema nestedSchema = convertFromBlockSchema(ctype.getColumnSchema()); fieldSchemas.add(new FieldSchema(ctype.getName(), nestedSchema, pigtype)); } else fieldSchemas.add(new FieldSchema(ctype.getName(), pigtype)); } return new Schema(fieldSchemas); } public static byte convertToPigType(String rcfTypeName) { String pigTypeString; if (rcfTypeName.equals("STRING")) pigTypeString = ("CHARARRAY"); else if (rcfTypeName.equals("INT")) pigTypeString = "INTEGER"; else if (rcfTypeName.equals("RECORD")) pigTypeString = "TUPLE"; else if (rcfTypeName.equals("ARRAY")) pigTypeString = "BAG"; else if (rcfTypeName.equals("BYTES")) pigTypeString = "BYTEARRAY"; else pigTypeString = (rcfTypeName); return DataType.genNameToTypeMap().get(pigTypeString); } public static String convertoRCFTypeName(String pigTypeName) { String rcfTypeName = pigTypeName; if (pigTypeName.equals("chararray")) rcfTypeName = "STRING"; else if (pigTypeName.equals("bytearray")) rcfTypeName = "BYTES"; return rcfTypeName; } public static BlockSchema getWiderSchema(BlockSchema schema1, BlockSchema schema2) { /// require that the schemas are "consistent" // schema1.equalsIgnoreNumeric(schema2)) must be satisfied ColumnType[] ctypes = new ColumnType[schema1.getNumColumns()]; for (int i = 0; i < ctypes.length; i++) { ColumnType type1 = schema1.getColumnType(i); ColumnType type2 = schema2.getColumnType(i); if (type1.getType().isNumerical() && type2.getType().isNumerical()) { ctypes[i] = new ColumnType(type1.getName(), com.linkedin.cubert.block.DataType.getWiderType(type1.getType(), type2.getType())); } else { ctypes[i] = type1; } } return new BlockSchema(ctypes); } }