/*
* Licensed to Crate.IO GmbH ("Crate") under one or more contributor
* license agreements. See the NOTICE file distributed with this work for
* additional information regarding copyright ownership. Crate licenses
* this file to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial agreement.
*/
package io.crate.executor.transport.distributed;
import com.google.common.base.Throwables;
import io.crate.Streamer;
import io.crate.data.Bucket;
import io.crate.data.Row;
import io.crate.executor.transport.StreamBucket;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* builds N buckets where N is passed in the CTOR.
* The rows that are added via {@link #add(Row)} are assigned to the buckets by modulo calculation.
*/
public class ModuloBucketBuilder implements MultiBucketBuilder {
private final int numBuckets;
private final List<StreamBucket.Builder> bucketBuilders;
private final int distributedByColumnIdx;
private volatile int size = 0;
public ModuloBucketBuilder(Streamer<?>[] streamers, int numBuckets, int distributedByColumnIdx) {
this.numBuckets = numBuckets;
this.distributedByColumnIdx = distributedByColumnIdx;
this.bucketBuilders = new ArrayList<>(numBuckets);
for (int i = 0; i < numBuckets; i++) {
bucketBuilders.add(new StreamBucket.Builder(streamers));
}
}
@Override
public void add(Row row) {
final StreamBucket.Builder builder = bucketBuilders.get(getBucket(row));
try {
synchronized (this) {
builder.add(row);
size++;
}
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
@Override
public int size() {
return size;
}
@Override
public synchronized void build(Bucket[] buckets) {
assert buckets.length == numBuckets : "length of the provided array must match numBuckets";
for (int i = 0; i < numBuckets; i++) {
try {
final StreamBucket.Builder builder = bucketBuilders.get(i);
buckets[i] = builder.build();
builder.reset();
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
size = 0;
}
/**
* get bucket number by doing modulo hashcode of the defined row-element
*/
private int getBucket(Row row) {
int hash = hashCode(row.get(distributedByColumnIdx));
if (hash == Integer.MIN_VALUE) {
hash = 0; // Math.abs(Integer.MIN_VALUE) == Integer.MIN_VALUE
}
return Math.abs(hash) % numBuckets;
}
private static int hashCode(@Nullable Object value) {
if (value == null) {
return 0;
}
if (value instanceof BytesRef) {
// since lucene 4.8
// BytesRef.hashCode() uses a random seed across different jvm
// which causes the hashCode / routing to be different on each node
// this breaks the group by redistribution logic - need to use a fixed seed here
// to be consistent.
return StringHelper.murmurhash3_x86_32(((BytesRef) value), 1);
}
return value.hashCode();
}
}