/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.runtime; import java.io.FileReader; import java.io.IOException; import java.util.List; import java.util.Properties; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.SourceState; import gobblin.configuration.WorkUnitState; import gobblin.metastore.FsStateStore; import gobblin.metastore.StateStore; import gobblin.runtime.local.LocalJobLauncher; import gobblin.source.Source; import gobblin.source.extractor.DataRecordException; import gobblin.source.extractor.Extractor; import gobblin.source.extractor.extract.AbstractSource; import gobblin.source.workunit.Extract; import gobblin.source.workunit.WorkUnit; import gobblin.writer.DataWriter; import gobblin.writer.DataWriterBuilder; /** * Unit tests around the state store. * * <p> * This test uses the {@link LocalJobLauncher} to launch and run a dummy job and checks the * state store between runs of the dummy job to make sure important things like watermarks * are carried over properly between runs. * </p> * * @author Yinan Li */ @Test(groups = { "gobblin.runtime" }) public class DatasetStateStoreTest { private static final String JOB_NAME = DatasetStateStoreTest.class.getSimpleName(); private static final String NAMESPACE = "TestNamespace"; private static final String TABLE = "TestTable"; private static final String FOO = "foo"; private static final String BAR = "bar"; private static final String WORK_UNIT_INDEX_KEY = "work.unit.index"; private static final String LAST_READ_RECORD_KEY = "last.read.record"; private StateStore<JobState.DatasetState> datasetStateStore; private Properties jobConfig = new Properties(); @BeforeClass public void setUp() throws Exception { Properties properties = new Properties(); try (FileReader fr = new FileReader("gobblin-test/resource/gobblin.test.properties")) { properties.load(fr); } this.datasetStateStore = new FsStateStore<>( properties.getProperty(ConfigurationKeys.STATE_STORE_FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI), properties.getProperty(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY), JobState.DatasetState.class); // clear data that might be there from a prior run this.datasetStateStore.delete(JOB_NAME); this.jobConfig.putAll(properties); this.jobConfig.setProperty(ConfigurationKeys.JOB_NAME_KEY, JOB_NAME); this.jobConfig.setProperty(ConfigurationKeys.SOURCE_CLASS_KEY, DummySource.class.getName()); this.jobConfig.setProperty(ConfigurationKeys.WRITER_BUILDER_CLASS, DummyDataWriterBuilder.class.getName()); } @Test public void testLaunchFirstJob() throws Exception { try (JobLauncher launcher = new LocalJobLauncher(this.jobConfig)) { launcher.launchJob(null); } verifyJobState(1); } @Test(dependsOnMethods = "testLaunchFirstJob") public void testLaunchSecondJob() throws Exception { try (JobLauncher launcher = new LocalJobLauncher(this.jobConfig)) { launcher.launchJob(null); } verifyJobState(2); } @Test(dependsOnMethods = "testLaunchSecondJob") public void testLaunchThirdJob() throws Exception { try (JobLauncher launcher = new LocalJobLauncher(this.jobConfig)) { launcher.launchJob(null); } verifyJobState(3); } @AfterClass public void tearDown() throws IOException { this.datasetStateStore.delete(JOB_NAME); } private void verifyJobState(int run) throws IOException { List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(JOB_NAME, "current.jst"); Assert.assertEquals(datasetStateList.size(), 1); JobState jobState = datasetStateList.get(0); Assert.assertEquals(jobState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(jobState.getTaskStates().size(), DummySource.NUM_WORK_UNITS); for (TaskState taskState : jobState.getTaskStates()) { Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); Assert.assertEquals(taskState.getProp(FOO), BAR); // Check if the low watermark is properly kept track of int expectedLowWatermark = (run - 1) * DummySource.NUM_WORK_UNITS * DummySource.NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR + taskState.getPropAsInt(WORK_UNIT_INDEX_KEY) * DummySource.NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR + 1; Assert.assertEquals(taskState.getPropAsInt(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY), expectedLowWatermark); // Check if the high watermark is properly kept track of int expectedHighWatermark = expectedLowWatermark + DummySource.NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR - 1; Assert.assertEquals(taskState.getPropAsInt(ConfigurationKeys.WORK_UNIT_HIGH_WATER_MARK_KEY), expectedHighWatermark); Assert.assertEquals(taskState.getPropAsInt(LAST_READ_RECORD_KEY), expectedHighWatermark); } } /** * A dummy implementation of {@link Source}. */ public static class DummySource extends AbstractSource<String, Integer> { private static final int NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR = 1000; private static final int NUM_WORK_UNITS = 5; @Override public List<WorkUnit> getWorkunits(SourceState sourceState) { sourceState.setProp(FOO, BAR); if (Iterables.isEmpty(sourceState.getPreviousWorkUnitStates())) { return initializeWorkUnits(); } List<WorkUnit> workUnits = Lists.newArrayList(); for (WorkUnitState workUnitState : sourceState.getPreviousWorkUnitStates()) { WorkUnit workUnit = WorkUnit.create(createExtract(Extract.TableType.SNAPSHOT_ONLY, NAMESPACE, TABLE)); workUnit.setLowWaterMark(workUnitState.getPropAsInt(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY) + NUM_WORK_UNITS * NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR); workUnit.setHighWaterMark(workUnitState.getPropAsInt(ConfigurationKeys.WORK_UNIT_HIGH_WATER_MARK_KEY) + NUM_WORK_UNITS * NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR); workUnit.setProp(WORK_UNIT_INDEX_KEY, workUnitState.getPropAsInt(WORK_UNIT_INDEX_KEY)); workUnits.add(workUnit); } return workUnits; } @Override public Extractor<String, Integer> getExtractor(WorkUnitState state) throws IOException { return new DummyExtractor(state); } @Override public void shutdown(SourceState state) { // Nothing to do } private List<WorkUnit> initializeWorkUnits() { List<WorkUnit> workUnits = Lists.newArrayList(); for (int i = 0; i < NUM_WORK_UNITS; i++) { WorkUnit workUnit = WorkUnit.create(createExtract(Extract.TableType.SNAPSHOT_ONLY, NAMESPACE, TABLE)); workUnit.setLowWaterMark(i * NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR + 1); workUnit.setHighWaterMark((i + 1) * NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR); workUnit.setProp(WORK_UNIT_INDEX_KEY, i); workUnits.add(workUnit); } return workUnits; } } /** * A dummy implementation of {@link Extractor}. */ private static class DummyExtractor implements Extractor<String, Integer> { private final WorkUnitState workUnitState; private int current; DummyExtractor(WorkUnitState workUnitState) { this.workUnitState = workUnitState; workUnitState.setProp(FOO, BAR); this.current = Integer.parseInt(this.workUnitState.getProp(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY)); } @Override public String getSchema() { return ""; } @Override public Integer readRecord(Integer reuse) throws DataRecordException, IOException { if (this.current > this.workUnitState.getPropAsInt(ConfigurationKeys.WORK_UNIT_HIGH_WATER_MARK_KEY)) { return null; } this.workUnitState.setProp(LAST_READ_RECORD_KEY, this.current); return this.current++; } @Override public long getExpectedRecordCount() { return DummySource.NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR; } @Override public long getHighWatermark() { return this.workUnitState.getHighWaterMark(); } @Override public void close() throws IOException { // Nothing to do } } /** * A dummy implementation of {@link DataWriterBuilder} to work with {@link DummySource}. */ public static class DummyDataWriterBuilder extends DataWriterBuilder<String, Integer> { @Override public DataWriter<Integer> build() throws IOException { return new DummyDataWriter(); } } /** * A dummy implementation of {@link DataWriter} to work with {@link DummySource}. */ private static class DummyDataWriter implements DataWriter<Integer> { @Override public void write(Integer record) throws IOException { // Nothing to do } @Override public void commit() throws IOException { // Nothing to do } @Override public void cleanup() throws IOException { // Nothing to do } @Override public long recordsWritten() { return DummySource.NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR; } @Override public long bytesWritten() throws IOException { return DummySource.NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR * 4; } @Override public void close() throws IOException { // Nothing to do } } }