/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.extractor.extract;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.testng.Assert;
import org.testng.annotations.Test;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableSet;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.SourceState;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.configuration.WorkUnitState.WorkingState;
import gobblin.source.extractor.extract.QueryBasedSource.SourceEntity;
import gobblin.source.workunit.Extract;
import gobblin.source.workunit.Extract.TableType;
import gobblin.source.workunit.WorkUnit;
import gobblin.util.dataset.DatasetUtils;
/**
* Unit tests for {@link QueryBasedSource}
*/
public class QueryBasedSourceTest {
@Test
public void testSourceEntity() {
SourceEntity se1 = SourceEntity.fromSourceEntityName("SourceEntity1");
Assert.assertEquals(se1.getSourceEntityName(), "SourceEntity1");
Assert.assertEquals(se1.getDestTableName(), "SourceEntity1");
Assert.assertEquals(se1.getDatasetName(), "SourceEntity1");
SourceEntity se2 = SourceEntity.fromSourceEntityName("SourceEntity$2");
Assert.assertEquals(se2.getSourceEntityName(), "SourceEntity$2");
Assert.assertEquals(se2.getDestTableName(), "SourceEntity_2");
Assert.assertEquals(se2.getDatasetName(), "SourceEntity$2");
State st1 = new State();
st1.setProp(ConfigurationKeys.SOURCE_ENTITY, "SourceEntity3");
st1.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "SourceEntity3_Table");
Optional<SourceEntity> se3 = SourceEntity.fromState(st1);
Assert.assertTrue(se3.isPresent());
Assert.assertEquals(se3.get().getSourceEntityName(), "SourceEntity3");
Assert.assertEquals(se3.get().getDestTableName(), "SourceEntity3_Table");
Assert.assertEquals(se3.get().getDatasetName(), "SourceEntity3");
Assert.assertEquals(se3.get(), new SourceEntity("SourceEntity3", "SourceEntity3_Table"));
State st2 = new State();
st2.setProp(ConfigurationKeys.SOURCE_ENTITY, "SourceEntity$4");
Optional<SourceEntity> se4 = SourceEntity.fromState(st2);
Assert.assertTrue(se4.isPresent());
Assert.assertEquals(se4.get(), SourceEntity.fromSourceEntityName("SourceEntity$4"));
State st3 = new State();
st3.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "Table5");
Optional<SourceEntity> se5 = SourceEntity.fromState(st3);
Assert.assertTrue(se5.isPresent());
Assert.assertEquals(se5.get(), SourceEntity.fromSourceEntityName("Table5"));
}
private Set<SourceEntity> getFilteredEntities(SourceState state) {
Set<SourceEntity> unfiltered = QueryBasedSource.getSourceEntitiesHelper(state);
return QueryBasedSource.getFilteredSourceEntitiesHelper(state, unfiltered);
}
@Test
public void testGetFilteredSourceEntities() {
{
SourceState state = new SourceState();
state.setProp(QueryBasedSource.ENTITY_BLACKLIST, "Table1,BadTable.*");
state.setProp(ConfigurationKeys.SOURCE_ENTITIES, "Table1,Table2,BadTable1,Table3");
state.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "PropShouldBeIgnored");
Set<SourceEntity> res = getFilteredEntities(state);
Assert.assertEquals(res.size(), 2);
Assert.assertTrue(res.contains(SourceEntity.fromSourceEntityName("Table2")),
"Missing Table2 in " + res);
Assert.assertTrue(res.contains(SourceEntity.fromSourceEntityName("Table3")),
"Missing Table3 in " + res);
}
{
SourceState state = new SourceState();
state.setProp(QueryBasedSource.ENTITY_BLACKLIST, "Table1,BadTable.*");
state.setProp(QueryBasedSource.ENTITY_WHITELIST, "Table3");
state.setProp(ConfigurationKeys.SOURCE_ENTITIES, "Table1,Table2,BadTable1,Table3");
state.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "PropShouldBeIgnored");
Set<SourceEntity> res = getFilteredEntities(state);
Assert.assertEquals(res.size(), 1);
Assert.assertTrue(res.contains(SourceEntity.fromSourceEntityName("Table3")),
"Missing Table3 in " + res);
}
{
SourceState state = new SourceState();
state.setProp(QueryBasedSource.ENTITY_BLACKLIST, "Table1,BadTable.*");
state.setProp(QueryBasedSource.ENTITY_WHITELIST, "Table3");
state.setProp(ConfigurationKeys.SOURCE_ENTITY, "Table3");
state.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "PropShouldNotBeIgnored");
Set<SourceEntity> res = getFilteredEntities(state);
SourceEntity expected = new SourceEntity("Table3", "PropShouldNotBeIgnored");
Assert.assertEquals(res.size(), 1);
Assert.assertTrue(res.contains(expected), "Missing Table3 in " + res);
}
{
SourceState state = new SourceState();
state.setProp(QueryBasedSource.ENTITY_BLACKLIST, "Table1,BadTable.*");
state.setProp(QueryBasedSource.ENTITY_WHITELIST, "Table5");
state.setProp(ConfigurationKeys.SOURCE_ENTITY, "Table3");
state.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "PropShouldNotBeIgnored");
Set<SourceEntity> res = getFilteredEntities(state);
Assert.assertEquals(res.size(), 0);
}
}
@Test
public void testGetTableSpecificPropsFromState() {
SourceState state = new SourceState();
state.setProp(DatasetUtils.DATASET_SPECIFIC_PROPS,
"[{\"dataset\":\"Entity1\", \"value\": 1}, {\"dataset\":\"Table2\", \"value\":2}]");
// We should look in the dataset specific properties using the entity name, not table name
SourceEntity se1 = new SourceEntity("Entity1", "Table2");
SourceEntity se3 = new SourceEntity("Entity3", "Table3");
Set<SourceEntity> entities = ImmutableSet.of(se1, se3);
Map<SourceEntity, State> datasetProps =
QueryBasedSource.getTableSpecificPropsFromState(entities, state);
// Value 1 should be returned for se1, no prpos should be returned for se3
Assert.assertEquals(datasetProps.size(), 1);
Assert.assertTrue(datasetProps.containsKey(se1));
State se1Props = datasetProps.get(se1);
Assert.assertEquals(se1Props.getProp("value"), "1");
}
@Test
public void testGetPreviousWatermarksForAllTables() {
{
State prevJobState = new SourceState();
prevJobState.setProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "full");
Extract[] extracts = new Extract[3];
SourceEntity[] sourceEntities = new SourceEntity[extracts.length];
List<WorkUnitState> prevWuStates = new ArrayList<>();
// Simulate previous execution with 3 tables and 9 workunits
// All work units for the Table1 failed.
// Workunit 0 for Table0 returned no results
for (int i = 0; i < extracts.length; ++i) {
String sourceEntityName = "Table$" + i;
SourceEntity sourceEntity = SourceEntity.fromSourceEntityName(sourceEntityName);
sourceEntities[i] = sourceEntity;
extracts[i] = new Extract(TableType.APPEND_ONLY, "", sourceEntity.getDestTableName());
for (int j = 0; j < 3; ++j) {
WorkUnit wu = new WorkUnit(extracts[i]);
wu.setProp(ConfigurationKeys.SOURCE_ENTITY, sourceEntity.getSourceEntityName());
wu.setProp(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY, 10 * i);
wu.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, sourceEntity.getDestTableName());
WorkUnitState wuState = new WorkUnitState(wu, prevJobState);
wuState.setProp(ConfigurationKeys.WORK_UNIT_STATE_RUNTIME_HIGH_WATER_MARK, 20 * i);
wuState.setProp(ConfigurationKeys.WORK_UNIT_WORKING_STATE_KEY,
i == 1 ? WorkingState.FAILED.toString() : WorkingState.SUCCESSFUL.toString() );
wuState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED, (i + j) * 5);
prevWuStates.add(wuState);
}
}
SourceState prevState = new SourceState(prevJobState, prevWuStates);
Map<SourceEntity, Long> previousWM =
QueryBasedSource.getPreviousWatermarksForAllTables(prevState);
Assert.assertEquals(previousWM.size(), 3);
// No records read for one WU for Table0: min of all LWM
Assert.assertEquals(previousWM.get(sourceEntities[0]), Long.valueOf(0L));
// Failure for Table 1: min of all LWM
Assert.assertEquals(previousWM.get(sourceEntities[1]), Long.valueOf(10L));
// Success for Table 2: max of all HWM
Assert.assertEquals(previousWM.get(sourceEntities[2]), Long.valueOf(40L));
}
}
}