/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.hcatalog.streaming.mutate.worker; import java.util.List; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; /** * Implementation of a {@link BucketIdResolver} that includes the logic required to calculate a bucket id from a record * that is consistent with Hive's own internal computation scheme. */ public class BucketIdResolverImpl implements BucketIdResolver { private static final long INVALID_TRANSACTION_ID = -1L; private static final long INVALID_ROW_ID = -1L; private final SettableStructObjectInspector structObjectInspector; private final StructField[] bucketFields; private final int totalBuckets; private final StructField recordIdentifierField; /** * Note that all column indexes are with respect to your record structure, not the Hive table structure. Bucket column * indexes must be presented in the same order as they are in the Hive table definition. */ public BucketIdResolverImpl(ObjectInspector objectInspector, int recordIdColumn, int totalBuckets, int[] bucketColumns) { this.totalBuckets = totalBuckets; if (!(objectInspector instanceof SettableStructObjectInspector)) { throw new IllegalArgumentException("Serious problem, expected a StructObjectInspector, " + "but got a " + objectInspector.getClass().getName()); } if (bucketColumns.length < 1) { throw new IllegalArgumentException("No bucket column indexes set."); } structObjectInspector = (SettableStructObjectInspector) objectInspector; List<? extends StructField> structFields = structObjectInspector.getAllStructFieldRefs(); recordIdentifierField = structFields.get(recordIdColumn); bucketFields = new StructField[bucketColumns.length]; for (int i = 0; i < bucketColumns.length; i++) { int bucketColumnsIndex = bucketColumns[i]; bucketFields[i] = structFields.get(bucketColumnsIndex); } } @Override public Object attachBucketIdToRecord(Object record) { int bucketId = computeBucketId(record); RecordIdentifier recordIdentifier = new RecordIdentifier(INVALID_TRANSACTION_ID, bucketId, INVALID_ROW_ID); structObjectInspector.setStructFieldData(record, recordIdentifierField, recordIdentifier); return record; } @Override public int computeBucketId(Object record) { Object[] bucketFieldValues = new Object[bucketFields.length]; ObjectInspector[] bucketFiledInspectors = new ObjectInspector[bucketFields.length]; for (int columnIndex = 0; columnIndex < bucketFields.length; columnIndex++) { bucketFieldValues[columnIndex] = structObjectInspector.getStructFieldData(record, bucketFields[columnIndex]); bucketFiledInspectors[columnIndex] = bucketFields[columnIndex].getFieldObjectInspector(); } return ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketFiledInspectors, totalBuckets); } }