/* * Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package gobblin.source; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.SourceState; import gobblin.util.DatePartitionType; /** * Implementation of {@link gobblin.source.Source} that reads over date-partitioned Avro data. * * <p> * * For example, if {@link ConfigurationKeys#SOURCE_FILEBASED_DATA_DIRECTORY} is set to /my/data/, then the class assumes * folders following the pattern /my/data/daily/[year]/[month]/[day] are present. It will iterate through all the data * under these folders starting from the date specified by {@link #DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE} until * either {@link #DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB} files have been processed, or until there is no more data * to process. For example, if {@link #DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE} is set to 2015/01/01, then the job * will read from the folder /my/data/daily/2015/01/01/, /my/data/daily/2015/01/02/, /my/data/2015/01/03/ etc. * * <p> * * The class will only process data in Avro format. */ public class DatePartitionedDailyAvroSource extends DatePartitionedAvroFileSource { @Override protected void init(SourceState state) { state.setProp(DATE_PARTITIONED_SOURCE_PARTITION_PATTERN, DatePartitionType.DAY.getDateTimePattern()); super.init(state); } }