/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.metadata.lineage; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.common.app.RunIds; import co.cask.cdap.data2.datafabric.dataset.DatasetsUtil; import co.cask.cdap.data2.dataset2.DatasetFrameworkTestUtil; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.ProgramType; import com.google.common.base.Predicates; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import org.apache.twill.api.RunId; import org.junit.Assert; import org.junit.ClassRule; import org.junit.Test; import java.util.Set; /** * Tests storage and retrieval of Dataset accesses by Programs in {@link LineageDataset}. */ public class LineageDatasetTest { @ClassRule public static DatasetFrameworkTestUtil dsFrameworkUtil = new DatasetFrameworkTestUtil(); @Test public void testOneRelation() throws Exception { LineageDataset lineageDataset = getLineageDataset("testOneRelation"); Assert.assertNotNull(lineageDataset); RunId runId = RunIds.generate(10000); Id.DatasetInstance datasetInstance = Id.DatasetInstance.from("default", "dataset1"); Id.Program program = Id.Program.from("default", "app1", ProgramType.FLOW, "flow1"); Id.Flow.Flowlet flowlet = Id.Flow.Flowlet.from(program.getApplication(), program.getId(), "flowlet1"); Id.Run run = new Id.Run(program, runId.getId()); long accessTimeMillis = System.currentTimeMillis(); lineageDataset.addAccess(run, datasetInstance, AccessType.READ, accessTimeMillis, flowlet); Relation expected = new Relation(datasetInstance, program, AccessType.READ, runId, ImmutableSet.of(flowlet)); Set<Relation> relations = lineageDataset.getRelations(datasetInstance, 0, 100000, Predicates.<Relation>alwaysTrue()); Assert.assertEquals(1, relations.size()); Assert.assertEquals(expected, relations.iterator().next()); Assert.assertEquals(toSet(datasetInstance, program), lineageDataset.getEntitiesForRun(run)); Assert.assertEquals(ImmutableList.of(accessTimeMillis), lineageDataset.getAccessTimesForRun(run)); } @Test public void testMultipleRelations() throws Exception { LineageDataset lineageDataset = getLineageDataset("testMultipleRelations"); Assert.assertNotNull(lineageDataset); RunId runId1 = RunIds.generate(10000); RunId runId2 = RunIds.generate(20000); RunId runId3 = RunIds.generate(30000); RunId runId4 = RunIds.generate(40000); Id.DatasetInstance datasetInstance1 = Id.DatasetInstance.from("default", "dataset1"); Id.DatasetInstance datasetInstance2 = Id.DatasetInstance.from("default", "dataset2"); Id.Stream stream1 = Id.Stream.from("default", "stream1"); Id.Stream stream2 = Id.Stream.from("default", "stream2"); Id.Program program1 = Id.Program.from("default", "app1", ProgramType.FLOW, "flow1"); Id.Flow.Flowlet flowlet1 = Id.Flow.Flowlet.from(program1.getApplication(), program1.getId(), "flowlet1"); Id.Program program2 = Id.Program.from("default", "app2", ProgramType.WORKER, "worker2"); Id.Program program3 = Id.Program.from("default", "app3", ProgramType.SERVICE, "service3"); Id.Run run11 = new Id.Run(program1, runId1.getId()); Id.Run run22 = new Id.Run(program2, runId2.getId()); Id.Run run23 = new Id.Run(program2, runId3.getId()); Id.Run run34 = new Id.Run(program3, runId4.getId()); long now = System.currentTimeMillis(); //noinspection UnnecessaryLocalVariable long run11Data1AccessTime = now; lineageDataset.addAccess(run11, datasetInstance1, AccessType.READ, run11Data1AccessTime, flowlet1); long run22Data2AccessTime = now + 1; lineageDataset.addAccess(run22, datasetInstance2, AccessType.WRITE, run22Data2AccessTime); long run22Stream1AccessTime = now + 2; lineageDataset.addAccess(run22, stream1, AccessType.READ, run22Stream1AccessTime); long run23Stream2AccessTime = now + 1; lineageDataset.addAccess(run23, stream2, AccessType.READ, run23Stream2AccessTime); long run23Data2AccessTime = now + 3; lineageDataset.addAccess(run23, datasetInstance2, AccessType.WRITE, run23Data2AccessTime); lineageDataset.addAccess(run34, datasetInstance2, AccessType.READ_WRITE, System.currentTimeMillis()); lineageDataset.addAccess(run34, stream2, AccessType.UNKNOWN, System.currentTimeMillis()); Assert.assertEquals( ImmutableSet.of(new Relation(datasetInstance1, program1, AccessType.READ, runId1, ImmutableSet.of(flowlet1))), lineageDataset.getRelations(datasetInstance1, 0, 100000, Predicates.<Relation>alwaysTrue()) ); Assert.assertEquals( ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3), new Relation(datasetInstance2, program3, AccessType.READ_WRITE, runId4) ), lineageDataset.getRelations(datasetInstance2, 0, 100000, Predicates.<Relation>alwaysTrue()) ); Assert.assertEquals( ImmutableSet.of(new Relation(stream1, program2, AccessType.READ, runId2)), lineageDataset.getRelations(stream1, 0, 100000, Predicates.<Relation>alwaysTrue()) ); Assert.assertEquals( ImmutableSet.of(new Relation(stream2, program2, AccessType.READ, runId3), new Relation(stream2, program3, AccessType.UNKNOWN, runId4)), lineageDataset.getRelations(stream2, 0, 100000, Predicates.<Relation>alwaysTrue()) ); Assert.assertEquals( ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(stream1, program2, AccessType.READ, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3), new Relation(stream2, program2, AccessType.READ, runId3) ), lineageDataset.getRelations(program2, 0, 100000, Predicates.<Relation>alwaysTrue()) ); // Reduced time range Assert.assertEquals( ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3) ), lineageDataset.getRelations(datasetInstance2, 0, 35000, Predicates.<Relation>alwaysTrue()) ); Assert.assertEquals(toSet(program1, datasetInstance1), lineageDataset.getEntitiesForRun(run11)); Assert.assertEquals(ImmutableList.of(run11Data1AccessTime), lineageDataset.getAccessTimesForRun(run11)); Assert.assertEquals(toSet(program2, datasetInstance2, stream1), lineageDataset.getEntitiesForRun(run22)); Assert.assertEquals(ImmutableList.of(run22Data2AccessTime, run22Stream1AccessTime), lineageDataset.getAccessTimesForRun(run22)); Assert.assertEquals(toSet(program2, datasetInstance2, stream2), lineageDataset.getEntitiesForRun(run23)); Assert.assertEquals(ImmutableList.of(run23Data2AccessTime, run23Stream2AccessTime), lineageDataset.getAccessTimesForRun(run23)); Assert.assertEquals(toSet(program3, datasetInstance2, stream2), lineageDataset.getEntitiesForRun(run34)); } private static LineageDataset getLineageDataset(String instanceId) throws Exception { Id.DatasetInstance id = Id.DatasetInstance.from(DatasetFrameworkTestUtil.NAMESPACE_ID, instanceId); return DatasetsUtil.getOrCreateDataset(dsFrameworkUtil.getFramework(), id, LineageDataset.class.getName(), DatasetProperties.EMPTY, null, null); } @SafeVarargs private static <T> Set<T> toSet(T... elements) { return ImmutableSet.copyOf(elements); } }