package org.apache.hadoop.hive.mastiff; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.mastiff.MastiffHandlerUtil.MTableDesc; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.Writable; import cn.ac.ncic.mastiff.etl.ETLUtils; import cn.ac.ncic.mastiff.hive.serde.lazy.Row; /** * MastiffSerDe is used for column family based storage supported by SegmentFile. */ public class MastiffSerDe extends AbstractSerDe { public static final String MASTIFF_CF_DEFINITION_PATH = "cf.config.path"; public static final Log LOG = LogFactory.getLog(MastiffSerDe.class.getName()); private Configuration job; private Properties tbl; private ObjectInspector cachedObjectInspector; ArrayList<OIG> resuse = new ArrayList<OIG>(); private SerDeParameters serdeParams = null; private MTableDesc mtbl; private LazyMastiffRow cachedMastiffRow; // private final boolean isRowInit=false; private final boolean isRowInit = false; protected long serializedSize; private Row row; public boolean isRowMapInit = false; private RowMap rowMap = null; public static int count = 0; public MastiffSerDe() throws SerDeException { } private void initMastiffSerDeParameters(Configuration job, Properties tbl, String serdeName) throws SerDeException, IOException { serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName); this.job = job; this.tbl = tbl; } @Override public void initialize(Configuration job, Properties tbl) throws SerDeException { try { initMastiffSerDeParameters(job, tbl, getClass().getName()); } catch (IOException e) { e.printStackTrace(); } // Create the ObjectInspectors from the fields. cachedObjectInspector = createMastiffRowObjectInspector( serdeParams.getColumnNames(), serdeParams.getColumnTypes()); cachedMastiffRow = new LazyMastiffRow( (LazyMastiffRowObjectInspector) cachedObjectInspector); LOG.debug("MastiffSerDe initialized with: columnNames=" + serdeParams.getColumnNames() + " columnTypes=" + serdeParams.getColumnTypes()); } //SegmentFile /** * Get the ObjectInspector from serdeParams<br/> * Different from HbaseSerDe in field's object inspector, * since SegmentFile's fields was serialized from primitive java types, * we use PrimitiveJavaObjectInspector instead * * @param columnNames * @param columnTypes * @return */ public static ObjectInspector createMastiffRowObjectInspector(List<String> columnNames, List<TypeInfo> columnTypes) { ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>( columnTypes.size()); for (int i = 0; i < columnTypes.size(); i++) { TypeInfo ti = columnTypes.get(i); if (ti instanceof PrimitiveTypeInfo) { PrimitiveTypeInfo pti = (PrimitiveTypeInfo) ti; columnObjectInspectors.add(PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(pti.getPrimitiveCategory())); } else { columnObjectInspectors.add(PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(PrimitiveCategory.VOID)); } } LazyMastiffRowObjectInspector result = new LazyMastiffRowObjectInspector(columnNames, columnObjectInspectors); return result; } @Override public Object deserialize(Writable blob) throws SerDeException { if (mtbl == null) { getMTableDesc(job, tbl); } if (!(blob instanceof RowWritable)) { throw new SerDeException(getClass().toString() + ": expects RowWritable!"); } RowWritable rw = (RowWritable) blob; cachedMastiffRow.init(rw, mtbl); return cachedMastiffRow; } @Override public ObjectInspector getObjectInspector() throws SerDeException { return cachedObjectInspector; } @Override public SerDeStats getSerDeStats() { // TODO Auto-generated method stub return null; } @Override public Class<? extends Writable> getSerializedClass() { return RowWritable.class; } @Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); final List<Object> list = soi.getStructFieldsDataAsList(obj); if (isRowMapInit == false) { rowMap = new RowMap(SerializeUtil.desc.clusterTypes.size(), ETLUtils.getSchema(SerializeUtil.desc.clusterTypes), ETLUtils.getSchema(ETLUtils .getSchema(SerializeUtil.desc.clusterTypes))); isRowMapInit = true; // RowMap rowMap=new RowMap() ; // ArrayList resuse =new ArrayList<PrimitiveCategory >(); // VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, UNKNOWN for (int i = 0; i < SerializeUtil.desc.clusterTypes.size(); i++) { int j = 0; for (int col : SerializeUtil.desc.columnsMapping[i]) { final PrimitiveObjectInspector foi = (PrimitiveObjectInspector) fields.get(col) .getFieldObjectInspector(); switch (foi.getPrimitiveCategory()) { case BOOLEAN: // row.parseValue(i, ((BooleanObjectInspector) foi).get(list.get(i)) + ""); rowMap.row[i].setValue(j, ((BooleanObjectInspector) foi).get(list.get(col))); // resuse.add(((BooleanObjectInspector) foi).get(list.get(col))); resuse.add(new OIG() { public Object get(Object x) { return ((BooleanObjectInspector) foi).get(x); } }); break; case BYTE: // byte b = ((ByteObjectInspector) foi).get(list.get(i)); rowMap.row[i].setValue(j, ((ByteObjectInspector) foi).get(list.get(col))); resuse.add(new OIG() { public Object get(Object x) { return ((ByteObjectInspector) foi).get(x); } }); break; case SHORT: // row.parseValue(i, ((ShortObjectInspector) foi).get(list.get(i)) + ""); rowMap.row[i].setValue(j, ((ShortObjectInspector) foi).get(list.get(col))); resuse.add(new OIG() { public Object get(Object x) { return ((ShortObjectInspector) foi).get(x); } }); break; case INT: rowMap.row[i].setValue(j, ((IntObjectInspector) foi).get(list.get(col))); resuse.add(new OIG() { public Object get(Object x) { return ((IntObjectInspector) foi).get(x); } }); break; case LONG: // row.parseValue(i, ((LongObjectInspector) foi).get(list.get(i)) + ""); rowMap.row[i].setValue(j, ((LongObjectInspector) foi).get(list.get(col))); resuse.add(new OIG() { public Object get(Object x) { return ((LongObjectInspector) foi).get(x); } }); break; case FLOAT: // row.parseValue(i, ((FloatObjectInspector) foi).get(list.get(i)) + ""); rowMap.row[i].setValue(j, ((FloatObjectInspector) foi).get(list.get(col))); resuse.add(new OIG() { public Object get(Object x) { return ((FloatObjectInspector) foi).get(x); } }); break; case DOUBLE: // row.parseValue(i, ((DoubleObjectInspector) foi).get(list.get(i)) + ""); rowMap.row[i].setValue(j, ((DoubleObjectInspector) foi).get(list.get(col))); resuse.add(new OIG() { public Object get(Object x) { return ((DoubleObjectInspector) foi).get(x); } }); break; case STRING: rowMap.row[i].setValue(j, ((StringObjectInspector) foi).getPrimitiveWritableObject(list.get(col)).toString()); resuse.add(new OIG() { public Object get(Object x) { return ((StringObjectInspector) foi).getPrimitiveWritableObject(x).toString(); } }); break; case DATE: rowMap.row[i].setValue(j, ((DateObjectInspector) foi).getPrimitiveWritableObject(list.get(col)).getTimeInSeconds()); resuse.add(new OIG() { public Object get(Object x) { return ((DateObjectInspector) foi).getPrimitiveWritableObject(x).getTimeInSeconds(); } }); break; case TIMESTAMP: rowMap.row[i].setValue(j, (long) ((TimestampObjectInspector) foi) .getPrimitiveWritableObject(list.get(col)).getSeconds()); resuse.add(new OIG() { public Object get(Object x) { return (long) ((TimestampObjectInspector) foi).getPrimitiveWritableObject(x) .getSeconds(); } }); break; default: try { throw new RuntimeException("not supported type"); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } j++; } } } else { count = 0; for (int i = 0; i < SerializeUtil.desc.clusterTypes.size(); i++) { int j = 0; for (int col : SerializeUtil.desc.columnsMapping[i]) { PrimitiveObjectInspector foi = (PrimitiveObjectInspector) fields.get(col) .getFieldObjectInspector(); // switch (foi.getPrimitiveCategory()){ // case STRING: // // rowMap.row[i].setValue(j, ((StringObjectInspector) // foi).getPrimitiveWritableObject(list.get(col)).toString()); // break ; // case TIMESTAMP: // try { // rowMap.row[i].setValue(j, ((java.util.Date) // dateFormatter.parse(((TimestampObjectInspector) // foi).getPrimitiveWritableObject(list.get(col)).toString())).getTime() / 1000); // } catch (ParseException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // break ; // // default : // rowMap.row[i].setValue(j, resuse.get(count).get(list.get(col))); // count++; // break ; // } rowMap.row[i].setValue(j, resuse.get(count).get(list.get(col))); count++; j++; } } } return rowMap; } // VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, UNKNOWN // if (isRowInit == false) { // List<DataType> cols = new ArrayList<DataType>(); // for (int i = 0; i < fields.size(); i++) { // PrimitiveObjectInspector foi = (PrimitiveObjectInspector) fields.get(i) // .getFieldObjectInspector(); // // switch (foi.getPrimitiveCategory()) { // case BOOLEAN: // cols.add(DataType.BOOLEAN); // break; // case BYTE: // cols.add(DataType.BYTE); // break; // // case SHORT: // cols.add(DataType.SHORT); // break; // case INT: // cols.add(DataType.INT); // break; // case LONG: // cols.add(DataType.LONG); // break; // case FLOAT: // cols.add(DataType.FLOAT); // break; // case DOUBLE: // cols.add(DataType.DOUBLE); // break; // case STRING: // cols.add(DataType.STRING); // break; // case TIMESTAMP: // cols.add(DataType.LONG); // break; // default: // try { // throw new Exception("not supported type"); // } catch (Exception e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // } // } // row = new Row(cols); // isRowInit = true; // } // for (int i = 0; i < fields.size(); i++) { // PrimitiveObjectInspector foi = (PrimitiveObjectInspector) fields.get(i) // .getFieldObjectInspector(); // switch (foi.getPrimitiveCategory()) { // case BOOLEAN: // // row.parseValue(i, ((BooleanObjectInspector) foi).get(list.get(i)) + ""); // row.setValue(i, ((BooleanObjectInspector) foi).get(list.get(i))); // break; // case BYTE: // // byte b = ((ByteObjectInspector) foi).get(list.get(i)); // row.setValue(i, ((ByteObjectInspector) foi).get(list.get(i))); // break; // case SHORT: // //row.parseValue(i, ((ShortObjectInspector) foi).get(list.get(i)) + ""); // row.setValue(i, ((ShortObjectInspector) foi).get(list.get(i))); // break; // case INT: // // row.parseValue(i, ((IntObjectInspector) foi).get(list.get(i)) + ""); // row.setValue(i, ((IntObjectInspector) foi).get(list.get(i))); // break; // case LONG: // // row.parseValue(i, ((LongObjectInspector) foi).get(list.get(i)) + ""); // row.setValue(i, ((LongObjectInspector) foi).get(list.get(i))); // break; // case FLOAT: // // row.parseValue(i, ((FloatObjectInspector) foi).get(list.get(i)) + ""); // row.setValue(i, ((FloatObjectInspector) foi).get(list.get(i))); // break; // case DOUBLE: // // row.parseValue(i, ((DoubleObjectInspector) foi).get(list.get(i)) + ""); // row.setValue(i, ((DoubleObjectInspector) foi).get(list.get(i))); // break; // case STRING: // // row.parseValue(i, ((StringObjectInspector) foi).getPrimitiveWritableObject(list.get(i)) // // .toString()); // row.setValue(i, ((StringObjectInspector) // foi).getPrimitiveWritableObject(list.get(i)).toString()); // break; // case TIMESTAMP: // try { // dateFormatter = new SimpleDateFormat("yyyy-MM-dd"); // java.util.Date date = (java.util.Date) dateFormatter // .parse(((TimestampObjectInspector) foi).getPrimitiveWritableObject(list.get(i)) // .toString()); // // long time = date.getTime() / 1000; // row.setValue(i, date.getTime() / 1000); // // row.parseValue(i,time+""); // // break; // } catch (ParseException e) { // e.printStackTrace(); // } // default : // try { // throw new Exception("not supported type"); // } catch (Exception e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // } // } // return row; // } @Override public String toString() { return getClass().toString() + "[" + Arrays.asList(serdeParams.getSeparators()) + ":" + ((StructTypeInfo) serdeParams.getRowTypeInfo()) .getAllStructFieldNames() + ":" + ((StructTypeInfo) serdeParams.getRowTypeInfo()) .getAllStructFieldTypeInfos() + "]"; } private void getMTableDesc(Configuration job, Properties tbl) { String tableName = (String) tbl.get("name"); try { MastiffHandlerUtil.setCFMeta(job, tableName); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } mtbl = MastiffHandlerUtil.getMTableDesc(job); MastiffHandlerUtil.getColumnInfos(mtbl, tbl); MastiffHandlerUtil.getCFTypes(mtbl); } } interface OIG { Object get(Object x); }