/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.etl.spec;
import co.cask.cdap.api.artifact.ArtifactId;
import co.cask.cdap.api.artifact.ArtifactScope;
import co.cask.cdap.api.artifact.ArtifactVersion;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.dataset.DatasetProperties;
import co.cask.cdap.api.dataset.lib.FileSet;
import co.cask.cdap.etl.api.PipelineConfigurable;
import co.cask.cdap.etl.api.PipelineConfigurer;
import co.cask.cdap.etl.api.Transform;
import co.cask.cdap.etl.api.batch.BatchSink;
import co.cask.cdap.etl.api.batch.BatchSource;
import co.cask.cdap.etl.batch.BatchPipelineSpec;
import co.cask.cdap.etl.batch.BatchPipelineSpecGenerator;
import co.cask.cdap.etl.common.MockPluginConfigurer;
import co.cask.cdap.etl.proto.v2.ETLBatchConfig;
import co.cask.cdap.etl.proto.v2.ETLConfig;
import co.cask.cdap.etl.proto.v2.ETLPlugin;
import co.cask.cdap.etl.proto.v2.ETLStage;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.Map;
import java.util.Set;
/**
* Tests for converting a {@link ETLConfig} into a {@link PipelineSpec}.
*/
public class PipelineSpecGeneratorTest {
private static final Schema SCHEMA_A = Schema.recordOf("a", Schema.Field.of("a", Schema.of(Schema.Type.STRING)));
private static final Schema SCHEMA_B = Schema.recordOf("b", Schema.Field.of("b", Schema.of(Schema.Type.STRING)));
private static final ETLPlugin MOCK_SOURCE =
new ETLPlugin("mocksource", BatchSource.PLUGIN_TYPE, ImmutableMap.<String, String>of(), null);
private static final ETLPlugin MOCK_TRANSFORM_A =
new ETLPlugin("mockA", Transform.PLUGIN_TYPE, ImmutableMap.<String, String>of(), null);
private static final ETLPlugin MOCK_TRANSFORM_B =
new ETLPlugin("mockB", Transform.PLUGIN_TYPE, ImmutableMap.<String, String>of(), null);
private static final ETLPlugin MOCK_SINK =
new ETLPlugin("mocksink", BatchSink.PLUGIN_TYPE, ImmutableMap.<String, String>of(), null);
private static final ArtifactId ARTIFACT_ID =
new ArtifactId("plugins", new ArtifactVersion("1.0.0"), ArtifactScope.USER);
private static BatchPipelineSpecGenerator specGenerator;
@BeforeClass
public static void setupTests() {
// populate some mock plugins.
MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
pluginConfigurer.addMockPlugin(BatchSource.PLUGIN_TYPE, "mocksource", new MockPlugin(SCHEMA_A), artifactIds);
pluginConfigurer.addMockPlugin(Transform.PLUGIN_TYPE, "mockA", new MockPlugin(SCHEMA_A), artifactIds);
pluginConfigurer.addMockPlugin(Transform.PLUGIN_TYPE, "mockB", new MockPlugin(SCHEMA_B), artifactIds);
pluginConfigurer.addMockPlugin(BatchSink.PLUGIN_TYPE, "mocksink", new MockPlugin(), artifactIds);
specGenerator = new BatchPipelineSpecGenerator(pluginConfigurer,
ImmutableSet.of(BatchSource.PLUGIN_TYPE),
ImmutableSet.of(BatchSink.PLUGIN_TYPE),
FileSet.class, DatasetProperties.EMPTY);
}
@Test(expected = IllegalArgumentException.class)
public void testUniqueStageNames() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("t1", MOCK_TRANSFORM_A))
.addStage(new ETLStage("t1", MOCK_TRANSFORM_B))
.addStage(new ETLStage("sink", MOCK_SINK))
.addConnection("source", "sink")
.build();
specGenerator.generateSpec(etlConfig);
}
@Test(expected = IllegalArgumentException.class)
public void testConnectionWithMissingStage() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("sink", MOCK_SINK))
.addConnection("source", "sink")
.addConnection("source", "stage2")
.build();
specGenerator.generateSpec(etlConfig);
}
@Test(expected = IllegalArgumentException.class)
public void testConnectionIntoSource() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("sink", MOCK_SINK))
.addStage(new ETLStage("transform", MOCK_TRANSFORM_A))
.addConnection("source", "sink")
.addConnection("transform", "source")
.build();
specGenerator.generateSpec(etlConfig);
}
@Test(expected = IllegalArgumentException.class)
public void testConnectionOutOfSink() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("sink", MOCK_SINK))
.addStage(new ETLStage("transform", MOCK_TRANSFORM_A))
.addConnection("source", "sink")
.addConnection("sink", "transform")
.build();
specGenerator.generateSpec(etlConfig);
}
@Test(expected = IllegalArgumentException.class)
public void testUnreachableStage() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("sink", MOCK_SINK))
.addStage(new ETLStage("transform", MOCK_TRANSFORM_A))
.addConnection("source", "sink")
.addConnection("transform", "sink")
.build();
specGenerator.generateSpec(etlConfig);
}
@Test(expected = IllegalArgumentException.class)
public void testDeadEndStage() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("sink", MOCK_SINK))
.addStage(new ETLStage("transform", MOCK_TRANSFORM_A))
.addConnection("source", "sink")
.addConnection("source", "transform")
.build();
specGenerator.generateSpec(etlConfig);
}
@Test(expected = IllegalStateException.class)
public void testCycle() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("sink", MOCK_SINK))
.addStage(new ETLStage("t1", MOCK_TRANSFORM_A))
.addStage(new ETLStage("t2", MOCK_TRANSFORM_A))
.addConnection("source", "t1")
.addConnection("t1", "t2")
.addConnection("t2", "t1")
.addConnection("t2", "sink")
.build();
specGenerator.generateSpec(etlConfig);
}
@Test
public void testGenerateSpec() {
/*
* ---- t1 ------------
* | | |
* source --- | |--- t3 --- sink1
* | | |
* ------------ t2 --------------- sink2
* | |
* | |
* -------------------------
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("sink1", MOCK_SINK))
.addStage(new ETLStage("sink2", MOCK_SINK))
.addStage(new ETLStage("t1", MOCK_TRANSFORM_A))
.addStage(new ETLStage("t2", MOCK_TRANSFORM_A))
.addStage(new ETLStage("t3", MOCK_TRANSFORM_B))
.addConnection("source", "t1")
.addConnection("source", "t2")
.addConnection("source", "sink2")
.addConnection("t1", "t2")
.addConnection("t1", "t3")
.addConnection("t1", "sink2")
.addConnection("t2", "sink2")
.addConnection("t2", "t3")
.addConnection("t3", "sink1")
.build();
// test the spec generated is correct, with the right input and output schemas and artifact information.
BatchPipelineSpec actual = specGenerator.generateSpec(etlConfig);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder()
.addStage(
StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID))
.setOutputSchema(SCHEMA_A)
.addOutputs("t1", "t2", "sink2")
.build())
.addStage(
StageSpec.builder("sink1", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID))
.setInputSchema(SCHEMA_B)
.addInputs("t3")
.build())
.addStage(
StageSpec.builder("sink2", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID))
.setInputSchema(SCHEMA_A)
.addInputs("t1", "t2", "source")
.build())
.addStage(
StageSpec.builder("t1", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID))
.setInputSchema(SCHEMA_A)
.setOutputSchema(SCHEMA_A)
.addInputs("source")
.addOutputs("t2", "t3", "sink2")
.build())
.addStage(
StageSpec.builder("t2", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID))
.setInputSchema(SCHEMA_A)
.setOutputSchema(SCHEMA_A)
.addInputs("source", "t1")
.addOutputs("t3", "sink2")
.build())
.addStage(
StageSpec.builder("t3", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID))
.setInputSchema(SCHEMA_A)
.setOutputSchema(SCHEMA_B)
.addInputs("t1", "t2")
.addOutputs("sink1")
.build())
.addConnections(etlConfig.getConnections())
.setResources(etlConfig.getResources())
.setStageLoggingEnabled(etlConfig.isStageLoggingEnabled())
.build();
Assert.assertEquals(expected, actual);
}
@Test(expected = IllegalArgumentException.class)
public void testConflictingInputSchemas() {
/*
* ---- transformA ----
* | |
* source --- |--- sink
* | |
* ---- transformB ----
*
* sink gets schema A and schema B as input, should fail
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *")
.addStage(new ETLStage("source", MOCK_SOURCE))
.addStage(new ETLStage("sink", MOCK_SINK))
.addStage(new ETLStage("tA", MOCK_TRANSFORM_A))
.addStage(new ETLStage("tB", MOCK_TRANSFORM_B))
.addConnection("source", "tA")
.addConnection("source", "tB")
.addConnection("tA", "sink")
.addConnection("tB", "sink")
.build();
specGenerator.generateSpec(etlConfig);
}
private static class MockPlugin implements PipelineConfigurable {
private final Schema schema;
public MockPlugin() {
this.schema = null;
}
public MockPlugin(Schema schema) {
this.schema = schema;
}
@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException {
if (schema != null) {
pipelineConfigurer.getStageConfigurer().setOutputSchema(schema);
}
}
}
}