/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.hbase; import java.io.IOException; import java.util.List; import java.util.Map; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.io.Writable; public class HBaseRowSerializer { private final HBaseKeyFactory keyFactory; private final HBaseSerDeParameters hbaseParam; private final LazySerDeParameters serdeParam; private final int keyIndex; private final int timestampIndex; private final ColumnMapping keyMapping; private final ColumnMapping timestampMapping; private final ColumnMapping[] columnMappings; private final byte[] separators; // the separators array private final boolean escaped; // whether we need to escape the data when writing out private final byte escapeChar; // which char to use as the escape char, e.g. '\\' private final boolean[] needsEscape; // which chars need to be escaped. private final long putTimestamp; private final ByteStream.Output output = new ByteStream.Output(); public HBaseRowSerializer(HBaseSerDeParameters hbaseParam) { this.hbaseParam = hbaseParam; this.keyFactory = hbaseParam.getKeyFactory(); this.serdeParam = hbaseParam.getSerdeParams(); this.separators = serdeParam.getSeparators(); this.escaped = serdeParam.isEscaped(); this.escapeChar = serdeParam.getEscapeChar(); this.needsEscape = serdeParam.getNeedsEscape(); this.keyIndex = hbaseParam.getKeyIndex(); this.timestampIndex = hbaseParam.getTimestampIndex(); this.columnMappings = hbaseParam.getColumnMappings().getColumnsMapping(); this.keyMapping = hbaseParam.getColumnMappings().getKeyMapping(); this.timestampMapping = hbaseParam.getColumnMappings().getTimestampMapping(); this.putTimestamp = hbaseParam.getPutTimestamp(); } public Writable serialize(Object obj, ObjectInspector objInspector) throws Exception { if (objInspector.getCategory() != ObjectInspector.Category.STRUCT) { throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } // Prepare the field ObjectInspectors StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); List<Object> values = soi.getStructFieldsDataAsList(obj); StructField field = fields.get(keyIndex); Object value = values.get(keyIndex); byte[] key = keyFactory.serializeKey(value, field); if (key == null) { throw new SerDeException("HBase row key cannot be NULL"); } long timestamp = putTimestamp; if (timestamp < 0 && timestampIndex >= 0) { ObjectInspector inspector = fields.get(timestampIndex).getFieldObjectInspector(); value = values.get(timestampIndex); if (inspector instanceof LongObjectInspector) { timestamp = ((LongObjectInspector)inspector).get(value); } else { PrimitiveObjectInspector primitive = (PrimitiveObjectInspector) inspector; timestamp = PrimitiveObjectInspectorUtils.getTimestamp(value, primitive).getTime(); } } Put put = timestamp >= 0 ? new Put(key, timestamp) : new Put(key); // Serialize each field for (int i = 0; i < fields.size(); i++) { if (i == keyIndex || i == timestampIndex) { continue; } field = fields.get(i); value = values.get(i); serializeField(value, field, columnMappings[i], put); } return new PutWritable(put); } byte[] serializeKeyField(Object keyValue, StructField keyField, ColumnMapping keyMapping) throws IOException { if (keyValue == null) { throw new IOException("HBase row key cannot be NULL"); } ObjectInspector keyFieldOI = keyField.getFieldObjectInspector(); if (!keyFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && keyMapping.isCategory(ObjectInspector.Category.PRIMITIVE)) { // we always serialize the String type using the escaped algorithm for LazyString return serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false); } // use the serialization option switch to write primitive values as either a variable // length UTF8 string or a fixed width bytes if serializing in binary format boolean writeBinary = keyMapping.binaryStorage.get(0); return serialize(keyValue, keyFieldOI, 1, writeBinary); } private void serializeField( Object value, StructField field, ColumnMapping colMap, Put put) throws IOException { if (value == null) { // a null object, we do not serialize it return; } // Get the field objectInspector and the field object. ObjectInspector foi = field.getFieldObjectInspector(); // If the field corresponds to a column family in HBase if (colMap.qualifierName == null) { MapObjectInspector moi = (MapObjectInspector) foi; Map<?, ?> map = moi.getMap(value); if (map == null) { return; } ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); for (Map.Entry<?, ?> entry: map.entrySet()) { // Get the Key // Map keys are required to be primitive and may be serialized in binary format byte[] columnQualifierBytes = serialize(entry.getKey(), koi, 3, colMap.binaryStorage.get(0)); if (columnQualifierBytes == null) { continue; } // Map values may be serialized in binary format when they are primitive and binary // serialization is the option selected byte[] bytes = serialize(entry.getValue(), voi, 3, colMap.binaryStorage.get(1)); if (bytes == null) { continue; } put.add(colMap.familyNameBytes, columnQualifierBytes, bytes); } } else { byte[] bytes; // If the field that is passed in is NOT a primitive, and either the // field is not declared (no schema was given at initialization), or // the field is declared as a primitive in initialization, serialize // the data to JSON string. Otherwise serialize the data in the // delimited way. if (!foi.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && colMap.isCategory(ObjectInspector.Category.PRIMITIVE)) { // we always serialize the String type using the escaped algorithm for LazyString bytes = serialize(SerDeUtils.getJSONString(value, foi), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false); } else { // use the serialization option switch to write primitive values as either a variable // length UTF8 string or a fixed width bytes if serializing in binary format bytes = serialize(value, foi, 1, colMap.binaryStorage.get(0)); } if (bytes == null) { return; } put.add(colMap.familyNameBytes, colMap.qualifierNameBytes, bytes); } } /* * Serialize the row into a ByteStream. * * @param obj The object for the current field. * @param objInspector The ObjectInspector for the current Object. * @param level The current level of separator. * @param writeBinary Whether to write a primitive object as an UTF8 variable length string or * as a fixed width byte array onto the byte stream. * @throws IOException On error in writing to the serialization stream. * @return true On serializing a non-null object, otherwise false. */ private byte[] serialize(Object obj, ObjectInspector objInspector, int level, boolean writeBinary) throws IOException { output.reset(); if (objInspector.getCategory() == ObjectInspector.Category.PRIMITIVE && writeBinary) { LazyUtils.writePrimitive(output, obj, (PrimitiveObjectInspector) objInspector); } else { if (!serialize(obj, objInspector, level, output)) { return null; } } return output.toByteArray(); } private boolean serialize( Object obj, ObjectInspector objInspector, int level, ByteStream.Output ss) throws IOException { switch (objInspector.getCategory()) { case PRIMITIVE: LazyUtils.writePrimitiveUTF8(ss, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape); return true; case LIST: char separator = (char) separators[level]; ListObjectInspector loi = (ListObjectInspector)objInspector; List<?> list = loi.getList(obj); ObjectInspector eoi = loi.getListElementObjectInspector(); if (list == null) { return false; } else { for (int i = 0; i < list.size(); i++) { if (i > 0) { ss.write(separator); } serialize(list.get(i), eoi, level + 1, ss); } } return true; case MAP: char sep = (char) separators[level]; char keyValueSeparator = (char) separators[level+1]; MapObjectInspector moi = (MapObjectInspector) objInspector; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); Map<?, ?> map = moi.getMap(obj); if (map == null) { return false; } else { boolean first = true; for (Map.Entry<?, ?> entry: map.entrySet()) { if (first) { first = false; } else { ss.write(sep); } serialize(entry.getKey(), koi, level+2, ss); if ( entry.getValue() != null) { ss.write(keyValueSeparator); serialize(entry.getValue(), voi, level+2, ss); } } } return true; case STRUCT: sep = (char)separators[level]; StructObjectInspector soi = (StructObjectInspector)objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); list = soi.getStructFieldsDataAsList(obj); if (list == null) { return false; } else { for (int i = 0; i < list.size(); i++) { if (i > 0) { ss.write(sep); } serialize(list.get(i), fields.get(i).getFieldObjectInspector(), level + 1, ss); } } return true; case UNION: { // union type currently not totally supported. See HIVE-2390 return false; } default: throw new RuntimeException("Unknown category type: " + objInspector.getCategory()); } } }