/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.task.source; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.Arrays; import java.util.LinkedList; import java.util.NoSuchElementException; import java.nio.file.Path; import java.nio.file.Paths; import com.addthis.bundle.core.Bundle; import com.addthis.codec.annotations.FieldConfig; import com.google.common.collect.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * This data source <span class="hydra-summary">aggregates data from one or more data sources</span>. * <p/> * <p>The user specifies an ordered sequence of data sources. The aggregate * data source first retrieves all the data from the first source. When the first * source has been exhausted, then the data is retrieved from the * second source. And so on and so forth until all the data has been retrieved. * <p/> * <p><b>CAUTION:</b> If the individual data sources have mark directories * then you must set these mark directories to be different locations for * each data source. Otherwise the mark information will end up in * an inconsistent state. The data team is working on enforcing this requirement * automatically.</p> * <p/> * <p>Example:</p> * <pre> * source: [ * {mesh2 { * markDir:"markSource1" * mesh: { * files: "/path/to/files1" * } * }} * {mesh2 { * markDir:"markSource2" * mesh: { * files: "/path/to/files2" * } * }} * ] * </pre> * * @user-reference */ public class AggregateTaskDataSource extends TaskDataSource { private static final Logger log = LoggerFactory.getLogger(AggregateTaskDataSource.class); /** * an ordered sequence of data sources. This field is required. */ @FieldConfig(codable = true, required = true) private TaskDataSource[] sources; private TaskDataSource currentSource; private final LinkedList<TaskDataSource> sourceList = new LinkedList<>(); // to support test cases protected void setSources(TaskDataSource[] sources) { this.sources = sources; } @Override public void init() { for (TaskDataSource source : sources) { if (source.isEnabled()) { log.debug("init {}", source); source.init(); sourceList.add(source); } else { log.debug("disabled {}", source); } } requireValidSource(); } @Override public void close() { for (TaskDataSource source : sources) { if ((source != null) && source.isEnabled()) { log.debug("close {}", source); source.close(); } } } private void resetCurrentSource() { log.debug("resetCurrentSource {}", currentSource); currentSource = null; } private boolean requireValidSource() { while ((currentSource == null) && !sourceList.isEmpty()) { currentSource = sourceList.removeFirst(); log.debug("nextSource = {}", currentSource); if (currentSource.peek() != null) { log.debug("setSource {}", currentSource); return true; } currentSource = null; } return currentSource != null; } @Nullable @Override public Bundle next() { while (requireValidSource()) { @Nullable Bundle next; try { next = currentSource.next(); } catch (NoSuchElementException ignored) { // some legacy sources throw this exception instead of returning null next = null; } log.debug("next {}", next); if (next != null) { return next; } resetCurrentSource(); } return null; } @Nullable @Override public Bundle peek() { while (requireValidSource()) { Bundle peek = currentSource.peek(); log.debug("peek {}", peek); if (peek != null) { return peek; } resetCurrentSource(); } return null; } @Nonnull @Override public ImmutableList<Path> writableRootPaths() { return ImmutableList.copyOf( Arrays.stream(sources).flatMap( output -> output.writableRootPaths().stream()).iterator()); } }