/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.task.source; import javax.annotation.Nonnull; import java.util.NoSuchElementException; import java.nio.file.Path; import com.addthis.bundle.core.Bundle; import com.addthis.codec.annotations.FieldConfig; import com.addthis.hydra.common.hash.PluggableHashFunction; import com.addthis.hydra.task.run.TaskRunConfig; import com.google.common.collect.ImmutableList; /** * This data source <span class="hydra-summary">shards the input source by hashing on a bundle field</span>. * * @user-reference */ public class DataSourceHashed extends TaskDataSource { /** * Underlying data source from which data is fetched. This field is required. */ @FieldConfig(codable = true, required = true) private TaskDataSource stream; /** * Name of the bundle field whose values are used as input to a hash function. This field is required. */ @FieldConfig(codable = true, required = true) private String hashKey; /** * Total number of shards. This field is required. */ @FieldConfig(codable = true, required = true) private int shardTotal; @FieldConfig private TaskRunConfig config; private Bundle peek; private Integer[] shards; @Override public void init() { shards = config.calcShardList(shardTotal); stream.init(); } @Override public void close() { stream.close(); } @Override public Bundle peek() { Bundle tmp; while (peek == null && (tmp = stream.peek()) != null) { int hash = Math.abs(PluggableHashFunction.hash(tmp.getValue(tmp.getFormat().getField(hashKey)).asString().toString()) % shardTotal); for (Integer shard : shards) { if (hash == shard) { return tmp; } } stream.next(); } return null; } @Override public Bundle next() { Bundle ret; if ((ret = peek()) == null) { throw new NoSuchElementException(); } if (stream.next() == null) { throw new RuntimeException("next() return null after non-null peek"); } peek = null; return ret; } @Nonnull @Override public ImmutableList<Path> writableRootPaths() { return stream.writableRootPaths(); } }