/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.lazy;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Properties;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
/**
* LazyUtils.
*
*/
public final class LazyUtils {
/**
* Returns the digit represented by character b.
*
* @param b
* The ascii code of the character
* @param radix
* The radix
* @return -1 if it's invalid
*/
public static int digit(int b, int radix) {
int r = -1;
if (b >= '0' && b <= '9') {
r = b - '0';
} else if (b >= 'A' && b <= 'Z') {
r = b - 'A' + 10;
} else if (b >= 'a' && b <= 'z') {
r = b - 'a' + 10;
}
if (r >= radix) {
r = -1;
}
return r;
}
/**
* Returns -1 if the first byte sequence is lexicographically less than the
* second; returns +1 if the second byte sequence is lexicographically less
* than the first; otherwise return 0.
*/
public static int compare(byte[] b1, int start1, int length1, byte[] b2,
int start2, int length2) {
int min = Math.min(length1, length2);
for (int i = 0; i < min; i++) {
if (b1[start1 + i] == b2[start2 + i]) {
continue;
}
if (b1[start1 + i] < b2[start2 + i]) {
return -1;
} else {
return 1;
}
}
if (length1 < length2) {
return -1;
}
if (length1 > length2) {
return 1;
}
return 0;
}
/**
* Convert a UTF-8 byte array to String.
*
* @param bytes
* The byte[] containing the UTF-8 String.
* @param start
* The start position inside the bytes.
* @param length
* The length of the data, starting from "start"
* @return The unicode String
*/
public static String convertToString(byte[] bytes, int start, int length) {
try {
return Text.decode(bytes, start, length);
} catch (CharacterCodingException e) {
return null;
}
}
private static byte[] trueBytes = {(byte) 't', 'r', 'u', 'e'};
private static byte[] falseBytes = {(byte) 'f', 'a', 'l', 's', 'e'};
/**
* Write the bytes with special characters escaped.
*
* @param escaped
* Whether the data should be written out in an escaped way.
* @param escapeChar
* if escaped, the char for prefixing special characters.
* @param needsEscape
* if escaped, whether a specific character needs escaping. This
* array should have size of 128.
*/
private static void writeEscaped(OutputStream out, byte[] bytes, int start,
int len, boolean escaped, byte escapeChar, boolean[] needsEscape)
throws IOException {
if (escaped) {
int end = start + len;
for (int i = start; i <= end; i++) {
if (i == end || (bytes[i] >= 0 && needsEscape[bytes[i]])) {
if (i > start) {
out.write(bytes, start, i - start);
}
start = i;
if (i < len) {
out.write(escapeChar);
// the current char will be written out later.
}
}
}
} else {
out.write(bytes, 0, len);
}
}
/**
* Write out the text representation of a Primitive Object to a UTF8 byte
* stream.
*
* @param out
* The UTF8 byte OutputStream
* @param o
* The primitive Object
* @param needsEscape
* Whether a character needs escaping. This array should have size of
* 128.
*/
public static void writePrimitiveUTF8(OutputStream out, Object o,
PrimitiveObjectInspector oi, boolean escaped, byte escapeChar,
boolean[] needsEscape) throws IOException {
switch (oi.getPrimitiveCategory()) {
case BOOLEAN: {
boolean b = ((BooleanObjectInspector) oi).get(o);
if (b) {
out.write(trueBytes, 0, trueBytes.length);
} else {
out.write(falseBytes, 0, falseBytes.length);
}
break;
}
case BYTE: {
LazyInteger.writeUTF8(out, ((ByteObjectInspector) oi).get(o));
break;
}
case SHORT: {
LazyInteger.writeUTF8(out, ((ShortObjectInspector) oi).get(o));
break;
}
case INT: {
LazyInteger.writeUTF8(out, ((IntObjectInspector) oi).get(o));
break;
}
case LONG: {
LazyLong.writeUTF8(out, ((LongObjectInspector) oi).get(o));
break;
}
case FLOAT: {
float f = ((FloatObjectInspector) oi).get(o);
ByteBuffer b = Text.encode(String.valueOf(f));
out.write(b.array(), 0, b.limit());
break;
}
case DOUBLE: {
double d = ((DoubleObjectInspector) oi).get(o);
ByteBuffer b = Text.encode(String.valueOf(d));
out.write(b.array(), 0, b.limit());
break;
}
case STRING: {
Text t = ((StringObjectInspector) oi).getPrimitiveWritableObject(o);
writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar,
needsEscape);
break;
}
case BINARY: {
BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
byte[] toEncode = new byte[bw.getLength()];
System.arraycopy(bw.getBytes(), 0,toEncode, 0, bw.getLength());
byte[] toWrite = Base64.encodeBase64(toEncode);
out.write(toWrite, 0, toWrite.length);
break;
}
case DATE: {
LazyDate.writeUTF8(out,
((DateObjectInspector) oi).getPrimitiveWritableObject(o));
break;
}
case TIMESTAMP: {
LazyTimestamp.writeUTF8(out,
((TimestampObjectInspector) oi).getPrimitiveWritableObject(o));
break;
}
default: {
throw new RuntimeException("Hive internal error.");
}
}
}
/**
* Write out a binary representation of a PrimitiveObject to a byte stream.
*
* @param out ByteStream.Output, an unsynchronized version of ByteArrayOutputStream, used as a
* backing buffer for the the DataOutputStream
* @param o the PrimitiveObject
* @param oi the PrimitiveObjectInspector
* @throws IOException on error during the write operation
*/
public static void writePrimitive(
OutputStream out,
Object o,
PrimitiveObjectInspector oi) throws IOException {
DataOutputStream dos = new DataOutputStream(out);
try {
switch (oi.getPrimitiveCategory()) {
case BOOLEAN:
boolean b = ((BooleanObjectInspector) oi).get(o);
dos.writeBoolean(b);
break;
case BYTE:
byte bt = ((ByteObjectInspector) oi).get(o);
dos.writeByte(bt);
break;
case SHORT:
short s = ((ShortObjectInspector) oi).get(o);
dos.writeShort(s);
break;
case INT:
int i = ((IntObjectInspector) oi).get(o);
dos.writeInt(i);
break;
case LONG:
long l = ((LongObjectInspector) oi).get(o);
dos.writeLong(l);
break;
case FLOAT:
float f = ((FloatObjectInspector) oi).get(o);
dos.writeFloat(f);
break;
case DOUBLE:
double d = ((DoubleObjectInspector) oi).get(o);
dos.writeDouble(d);
break;
default:
throw new RuntimeException("Hive internal error.");
}
} finally {
// closing the underlying ByteStream should have no effect, the data should still be
// accessible
dos.close();
}
}
public static int hashBytes(byte[] data, int start, int len) {
int hash = 1;
for (int i = start; i < len; i++) {
hash = (31 * hash) + data[i];
}
return hash;
}
public static void extractColumnInfo(Properties tbl, SerDeParameters serdeParams,
String serdeName) throws SerDeException {
// Read the configuration parameters
String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
// NOTE: if "columns.types" is missing, all columns will be of String type
String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
// Parse the configuration parameters
if (columnNameProperty != null && columnNameProperty.length() > 0) {
serdeParams.columnNames = Arrays.asList(columnNameProperty.split(","));
} else {
serdeParams.columnNames = new ArrayList<String>();
}
if (columnTypeProperty == null) {
// Default type: all string
StringBuilder sb = new StringBuilder();
for (int i = 0; i < serdeParams.columnNames.size(); i++) {
if (i > 0) {
sb.append(":");
}
sb.append(Constants.STRING_TYPE_NAME);
}
columnTypeProperty = sb.toString();
}
serdeParams.columnTypes = TypeInfoUtils
.getTypeInfosFromTypeString(columnTypeProperty);
if (serdeParams.columnNames.size() != serdeParams.columnTypes.size()) {
throw new SerDeException(serdeName + ": columns has "
+ serdeParams.columnNames.size()
+ " elements while columns.types has "
+ serdeParams.columnTypes.size() + " elements!");
}
}
private LazyUtils() {
// prevent instantiation
}
}