/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.metadata; import co.cask.cdap.app.store.Store; import co.cask.cdap.common.NotFoundException; import co.cask.cdap.common.app.RunIds; import co.cask.cdap.common.entity.EntityExistenceVerifier; import co.cask.cdap.data2.dataset2.DatasetFramework; import co.cask.cdap.data2.metadata.lineage.AccessType; import co.cask.cdap.data2.metadata.lineage.Lineage; import co.cask.cdap.data2.metadata.lineage.LineageStore; import co.cask.cdap.data2.metadata.lineage.Relation; import co.cask.cdap.data2.metadata.store.MetadataStore; import co.cask.cdap.internal.app.services.http.AppFabricTestBase; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.ProgramType; import co.cask.cdap.proto.id.EntityId; import co.cask.cdap.proto.id.NamespaceId; import co.cask.cdap.proto.metadata.MetadataRecord; import co.cask.cdap.proto.metadata.MetadataScope; import co.cask.tephra.TransactionExecutorFactory; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.twill.api.RunId; import org.junit.After; import org.junit.Assert; import org.junit.Test; import java.util.Collections; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; /** * Tests lineage computation. */ public class LineageAdminTest extends AppFabricTestBase { // Define data private final Id.Stream stream1 = Id.Stream.from("default", "stream1"); private final Id.DatasetInstance dataset1 = Id.DatasetInstance.from("default", "dataset1"); private final Id.DatasetInstance dataset2 = Id.DatasetInstance.from("default", "dataset2"); private final Id.DatasetInstance dataset3 = Id.DatasetInstance.from("default", "dataset3"); private final Id.DatasetInstance dataset4 = Id.DatasetInstance.from("default", "dataset4"); private final Id.DatasetInstance dataset5 = Id.DatasetInstance.from("default", "dataset5"); private final Id.DatasetInstance dataset6 = Id.DatasetInstance.from("default", "dataset6"); private final Id.DatasetInstance dataset7 = Id.DatasetInstance.from("default", "dataset7"); // Define programs and runs private final Id.Program program1 = Id.Program.from("default", "app1", ProgramType.FLOW, "flow1"); private final Id.Flow.Flowlet flowlet1 = Id.Flow.Flowlet.from(program1.getApplication(), program1.getId(), "flowlet1"); private final Id.Run run1 = new Id.Run(program1, RunIds.generate(10000).getId()); private final Id.Program program2 = Id.Program.from("default", "app2", ProgramType.FLOW, "flow2"); private final Id.Flow.Flowlet flowlet2 = Id.Flow.Flowlet.from(program2.getApplication(), program2.getId(), "flowlet2"); private final Id.Run run2 = new Id.Run(program2, RunIds.generate(900).getId()); private final Id.Program program3 = Id.Worker.from("default", "app3", ProgramType.WORKER, "worker3"); private final Id.Run run3 = new Id.Run(program3, RunIds.generate(800).getId()); private final Id.Program program4 = Id.Program.from("default", "app4", ProgramType.SERVICE, "service4"); private final Id.Run run4 = new Id.Run(program4, RunIds.generate(800).getId()); private final Id.Program program5 = Id.Program.from("default", "app5", ProgramType.SERVICE, "service5"); private final Id.Run run5 = new Id.Run(program5, RunIds.generate(700).getId()); @After public void cleanup() throws Exception { deleteNamespace(NamespaceId.DEFAULT.getNamespace()); } @Test public void testSimpleLineage() throws Exception { // Lineage for D3 -> P2 -> D2 -> P1 -> D1 LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), Id.DatasetInstance.from("default", "testSimpleLineage")); Store store = getInjector().getInstance(Store.class); MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class); LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier()); // Define metadata MetadataRecord run1AppMeta = new MetadataRecord(program1.getApplication(), MetadataScope.USER, toMap("pk1", "pk1"), toSet("pt1")); MetadataRecord run1ProgramMeta = new MetadataRecord(program1, MetadataScope.USER, toMap("pk1", "pk1"), toSet("pt1")); MetadataRecord run1Data1Meta = new MetadataRecord(dataset1, MetadataScope.USER, toMap("dk1", "dk1"), toSet("dt1")); MetadataRecord run1Data2Meta = new MetadataRecord(dataset2, MetadataScope.USER, toMap("dk2", "dk2"), toSet("dt2")); // Add metadata metadataStore.setProperties(MetadataScope.USER, program1.getApplication(), run1AppMeta.getProperties()); //noinspection ToArrayCallWithZeroLengthArrayArgument metadataStore.addTags(MetadataScope.USER, program1.getApplication(), run1AppMeta.getTags().toArray(new String[0])); metadataStore.setProperties(MetadataScope.USER, program1, run1ProgramMeta.getProperties()); //noinspection ToArrayCallWithZeroLengthArrayArgument metadataStore.addTags(MetadataScope.USER, program1, run1ProgramMeta.getTags().toArray(new String[0])); metadataStore.setProperties(MetadataScope.USER, dataset1, run1Data1Meta.getProperties()); //noinspection ToArrayCallWithZeroLengthArrayArgument metadataStore.addTags(MetadataScope.USER, dataset1, run1Data1Meta.getTags().toArray(new String[0])); metadataStore.setProperties(MetadataScope.USER, dataset2, run1Data2Meta.getProperties()); //noinspection ToArrayCallWithZeroLengthArrayArgument metadataStore.addTags(MetadataScope.USER, dataset2, run1Data2Meta.getTags().toArray(new String[0])); TimeUnit.MILLISECONDS.sleep(1); // Add accesses for D3 -> P2 -> D2 -> P1 -> D1 // We need to use current time here as metadata store stores access time using current time Id.Run run1 = new Id.Run(program1, RunIds.generate(System.currentTimeMillis()).getId()); Id.Run run2 = new Id.Run(program2, RunIds.generate(System.currentTimeMillis()).getId()); addRuns(store, run1, run2); // It is okay to use current time here since access time is ignore during assertions lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run2, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run2, dataset3, AccessType.READ, System.currentTimeMillis(), flowlet2); Lineage expectedLineage = new Lineage( ImmutableSet.of( new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)) ) ); // Lineage for D1 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100)); // Lineage for D2 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100)); // Lineage for D1 for one level should be D2 -> P1 -> D1 Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1); Assert.assertEquals( ImmutableSet.of( new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)) ), oneLevelLineage.getRelations()); // Assert metadata Assert.assertEquals(toSet(run1AppMeta, run1ProgramMeta, run1Data1Meta, run1Data2Meta), lineageAdmin.getMetadataForRun(run1)); // Assert that in a different namespace both lineage and metadata should be empty Id.Namespace customNamespace = Id.Namespace.from("custom_namespace"); Id.DatasetInstance customDataset1 = Id.DatasetInstance.from(customNamespace, dataset1.getId()); Id.Run customRun1 = new Id.Run(Id.Program.from(customNamespace, program1.getApplicationId(), program1.getType(), program1.getId()), run1.getId()); Assert.assertEquals(new Lineage(ImmutableSet.<Relation>of()), lineageAdmin.computeLineage(customDataset1, 500, System.currentTimeMillis() + 10000, 100)); Assert.assertEquals(ImmutableSet.<MetadataRecord>of(), lineageAdmin.getMetadataForRun(customRun1)); } @Test public void testSimpleLoopLineage() throws Exception { // Lineage for D1 -> P1 -> D2 -> P2 -> D3 -> P3 -> D4 // | | // | V // |<----------------- // LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), Id.DatasetInstance.from("default", "testSimpleLoopLineage")); Store store = getInjector().getInstance(Store.class); MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class); LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier()); // Add access addRuns(store, run1, run2, run3, run4, run5); // It is okay to use current time here since access time is ignore during assertions lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run2, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run3, dataset3, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run3, dataset4, AccessType.WRITE, System.currentTimeMillis()); Lineage expectedLineage = new Lineage( ImmutableSet.of( new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset4, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset3, program3, AccessType.READ, twillRunId(run3), emptySet()) ) ); // Lineage for D1 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100)); // Lineage for D2 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset2, 500, 20000, 100)); // Lineage for D1 for one level D1 -> P1 -> D2 -> P2 -> D3 // | | // | V // |<----------------- // Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, 20000, 1); Assert.assertEquals( ImmutableSet.of( new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)) ), oneLevelLineage.getRelations()); } @Test public void testDirectCycle() throws Exception { // Lineage for: // // D1 <-> P1 // LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), Id.DatasetInstance.from("default", "testDirectCycle")); Store store = getInjector().getInstance(Store.class); MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class); LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier()); // Add accesses addRuns(store, run1, run2, run3, run4, run5); // It is okay to use current time here since access time is ignore during assertions lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1); Lineage expectedLineage = new Lineage( ImmutableSet.of( new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)) ) ); Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100)); } @Test public void testDirectCycleTwoRuns() throws Exception { // Lineage for: // // D1 -> P1 (run1) // // D1 <- P1 (run2) // LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), Id.DatasetInstance.from("default", "testDirectCycleTwoRuns")); Store store = getInjector().getInstance(Store.class); MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class); LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier()); // Add accesses addRuns(store, run1, run2, run3, run4, run5); // It is okay to use current time here since access time is ignore during assertions lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1); // Write is in a different run lineageStore.addAccess(new Id.Run(run1.getProgram(), run2.getId()), dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1); Lineage expectedLineage = new Lineage( ImmutableSet.of( new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run2), toSet(flowlet1)) ) ); Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100)); } @Test public void testBranchLineage() throws Exception { // Lineage for: // // ->D4 -> D5 -> P3 -> D6 // | | // | | // D1 -> P1 -> D2 -> P2 -> D3 // | | | // | | | // S1 -->| ---------------> P4 -> D7 LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), Id.DatasetInstance.from("default", "testBranchLineage")); Store store = getInjector().getInstance(Store.class); MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class); LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier()); // Add accesses addRuns(store, run1, run2, run3, run4, run5); // It is okay to use current time here since access time is ignore during assertions lineageStore.addAccess(run1, stream1, AccessType.READ, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset4, AccessType.WRITE, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run2, dataset5, AccessType.WRITE, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run3, dataset5, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run3, dataset6, AccessType.WRITE, System.currentTimeMillis()); lineageStore.addAccess(run4, dataset2, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run4, dataset3, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run4, dataset7, AccessType.WRITE, System.currentTimeMillis()); Lineage expectedLineage = new Lineage( ImmutableSet.of( new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset2, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset3, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset7, program4, AccessType.WRITE, twillRunId(run4), emptySet()) ) ); // Lineage for D7 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset7, 500, 20000, 100)); // Lineage for D6 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset6, 500, 20000, 100)); // Lineage for D3 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset3, 500, 20000, 100)); } @Test public void testBranchLoopLineage() throws Exception { // Lineage for: // // |-------------------------------------| // | | // | | // | -> D4 -> D5 -> P3 -> D6 -> P5 // | | | ^ // V | | | // D1 -> P1 -> D2 -> P2 -> D3 ----------->| // | | | // | | | // S1 -->| ---------------> P4 -> D7 LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), Id.DatasetInstance.from("default", "testBranchLoopLineage")); Store store = getInjector().getInstance(Store.class); MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class); LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier()); // Add accesses addRuns(store, run1, run2, run3, run4, run5); // It is okay to use current time here since access time is ignore during assertions lineageStore.addAccess(run1, stream1, AccessType.READ, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run1, dataset4, AccessType.WRITE, System.currentTimeMillis(), flowlet1); lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run2, dataset5, AccessType.WRITE, System.currentTimeMillis(), flowlet2); lineageStore.addAccess(run3, dataset5, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run3, dataset6, AccessType.WRITE, System.currentTimeMillis()); lineageStore.addAccess(run4, dataset2, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run4, dataset3, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run4, dataset7, AccessType.WRITE, System.currentTimeMillis()); lineageStore.addAccess(run5, dataset3, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run5, dataset6, AccessType.READ, System.currentTimeMillis()); lineageStore.addAccess(run5, dataset1, AccessType.WRITE, System.currentTimeMillis()); Lineage expectedLineage = new Lineage( ImmutableSet.of( new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset2, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset3, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset7, program4, AccessType.WRITE, twillRunId(run4), emptySet()), new Relation(dataset3, program5, AccessType.READ, twillRunId(run5), emptySet()), new Relation(dataset6, program5, AccessType.READ, twillRunId(run5), emptySet()), new Relation(dataset1, program5, AccessType.WRITE, twillRunId(run5), emptySet()) ) ); // Lineage for D1 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100)); // Lineage for D5 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset5, 500, 20000, 100)); // Lineage for D7 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset7, 500, 20000, 100)); // Lineage for S1 Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(stream1, 500, 20000, 100)); // Lineage for D5 for one level // -> D5 -> P3 -> D6 // | // | // D2 -> P2 -> D3 Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset5, 500, 20000, 1); Assert.assertEquals( ImmutableSet.of( new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet()) ), oneLevelLineage.getRelations() ); // Lineage for S1 for one level // // -> D4 // | // | // D1 -> P1 -> D2 // | // | // S1 -->| oneLevelLineage = lineageAdmin.computeLineage(stream1, 500, 20000, 1); Assert.assertEquals( ImmutableSet.of( new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)) ), oneLevelLineage.getRelations() ); } @Test public void testScanRange() { Set<RunId> runIds = ImmutableSet.of( RunIds.generate(500), RunIds.generate(400), RunIds.generate(600), RunIds.generate(200), RunIds.generate(700), RunIds.generate(100) ); LineageAdmin.ScanRangeWithFilter scanRange = LineageAdmin.getScanRange(runIds); Assert.assertEquals(100, scanRange.getStart()); Assert.assertEquals(701, scanRange.getEnd()); scanRange = LineageAdmin.getScanRange(ImmutableSet.<RunId>of()); Assert.assertEquals(0, scanRange.getStart()); Assert.assertEquals(0, scanRange.getEnd()); scanRange = LineageAdmin.getScanRange(ImmutableSet.of(RunIds.generate(100))); Assert.assertEquals(100, scanRange.getStart()); Assert.assertEquals(101, scanRange.getEnd()); } private void addRuns(Store store, Id.Run... runs) { for (Id.Run run : runs) { store.setStart(run.getProgram(), run.getId(), RunIds.getTime(RunIds.fromString(run.getId()), TimeUnit.SECONDS)); } } @SafeVarargs private static <T> Set<T> toSet(T... elements) { return ImmutableSet.copyOf(elements); } private Map<String, String> toMap(String key, String value) { return ImmutableMap.of(key, value); } private static Set<Id.NamespacedId> emptySet() { return Collections.emptySet(); } private RunId twillRunId(Id.Run run) { return RunIds.fromString(run.getId()); } private TransactionExecutorFactory getTxExecFactory() { return getInjector().getInstance(TransactionExecutorFactory.class); } private DatasetFramework getDatasetFramework() { return getInjector().getInstance(DatasetFramework.class); } private static final class NoOpEntityExistenceVerifier implements EntityExistenceVerifier { @Override public void ensureExists(EntityId entityId) throws NotFoundException { // no-op } } }