/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.writer; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import com.google.common.base.Optional; import lombok.extern.slf4j.Slf4j; import gobblin.writer.partitioner.WriterPartitioner; /** * A {@link DataWriterBuilder} used with a {@link WriterPartitioner}. When provided with a partitioner, Gobblin will create a * {@link gobblin.writer.DataWriter} per partition. All partitions will be build with identical builders, except * that {@link #forPartition} will specify the partition. * * <p> * The contract with the {@link PartitionAwareDataWriterBuilder} is as follows: * * Gobblin will call {@link #validatePartitionSchema(Schema)} before calling build(). * * Gobblin is guaranteed to call {@link #validatePartitionSchema(Schema)} for some instance of * {@link PartitionAwareDataWriterBuilder} with the same class, but not necessarily for the specific instance * that will be used to build the {@link DataWriter}. * * If !partition1.equals(partition2), then Gobblin may call build a writer for partition1 and a writer for * partition2 in the same job. This should not cause an exception. * * If partition1.equals(partition2), a single fork will not build writers for both partitions. * </p> * * <p> * The summary is: * * Make sure {@link #validatePartitionSchema} returns false if the writer can't handle the schema. * * {@link #validatePartitionSchema} should not have any side effects on the {@link PartitionAwareDataWriterBuilder}. * * Different partitions should generate non-colliding writers. * </p> */ @Slf4j public abstract class PartitionAwareDataWriterBuilder<S, D> extends DataWriterBuilder<S, D> { protected Optional<GenericRecord> partition = Optional.absent(); /** * Sets the partition that the build {@link DataWriter} will handle. * @param partition A {@link GenericRecord} specifying the partition. * @return A {@link PartitionAwareDataWriterBuilder}. */ public PartitionAwareDataWriterBuilder<S, D> forPartition(GenericRecord partition) { this.partition = Optional.fromNullable(partition); log.debug("For partition {}", this.partition); return this; } /** * Checks whether the {@link PartitionAwareDataWriterBuilder} is compatible with a given partition {@link Schema}. * If this method returns false, the execution will crash with an error. If this method returns true, the * {@link DataWriterBuilder} is expected to be able to understand the partitioning schema and handle it correctly. * @param partitionSchema {@link Schema} of {@link GenericRecord} objects that will be passed to {@link #forPartition}. * @return true if the {@link DataWriterBuilder} can understand the schema and is able to generate partitions from * this schema. */ public abstract boolean validatePartitionSchema(Schema partitionSchema); }