// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.survivorship;
import static org.junit.Assert.*;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.junit.Before;
import org.junit.Test;
import org.kie.internal.KnowledgeBase;
import org.talend.survivorship.model.Attribute;
import org.talend.survivorship.model.Column;
import org.talend.survivorship.model.ConflictRuleDefinition;
import org.talend.survivorship.model.DataSet;
import org.talend.survivorship.model.Record;
import org.talend.survivorship.model.RuleDefinition;
import org.talend.survivorship.sample.SampleData;
import org.talend.survivorship.sample.SampleDataConflict;
import org.talend.survivorship.sample.SampleDataConflictCheckRule;
import org.talend.survivorship.sample.SampleDataConflictMostCommon2Longest;
import org.talend.survivorship.sample.SampleDataConflictMostCommon2Longest2MostRecent;
import org.talend.survivorship.sample.SampleDataConflictMostCommon2Longest2keepOneOfDuplicte;
import org.talend.survivorship.sample.SampleDataConflictMostCommon2MostRecent;
import org.talend.survivorship.sample.SampleDataConflictMostCommon2OtherSurvivedValue;
import org.talend.survivorship.sample.SampleDataConflictMostCommonAndNoIgnoreBlank;
import org.talend.survivorship.sample.SampleDataConflictOtherColumn2MostCommon2Constant;
import org.talend.survivorship.sample.SampleDataConflictOtherColumn2MostCommon2ConstantEmptyDuplicate;
import org.talend.survivorship.sample.SampleDataConflictShortest2OtherColumnDuplicateSurvivedValue;
import org.talend.survivorship.sample.SampleDataConflictTwoNoConflictColumnGetOneSameSurvivedValue;
import org.talend.survivorship.sample.SampleDataRegexFunction;
/**
* Create by sizhaoliu test for SurvivorshipManager
*/
public class SurvivorshipManagerTest {
private SurvivorshipManager manager;
/**
* Setup SurvivorshipManager.
*
* @throws java.lang.Exception
*/
@Before
public void setUp() throws Exception {
manager = new SurvivorshipManager(SampleData.RULE_PATH, SampleData.PKG_NAME);
for (String str : SampleData.COLUMNS.keySet()) {
manager.addColumn(str, SampleData.COLUMNS.get(str));
}
for (RuleDefinition element : SampleData.RULES) {
manager.addRuleDefinition(element);
}
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#initKnowledgeBase()}.
*/
@Test
public void testInitKnowledgeBase() {
manager.initKnowledgeBase();
KnowledgeBase base = manager.getKnowledgeBase();
assertNotNull("Model is null", base.getFactType(SampleData.PKG_NAME, "RecordIn")); //$NON-NLS-1$ //$NON-NLS-2$
assertNotNull(base.getRule(SampleData.PKG_NAME, SampleData.RULES[0].getRuleName()));
assertNotNull(base.getProcess(SampleData.PKG_NAME + ".SurvivorFlow")); //$NON-NLS-1$
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
* no conflict case
*/
@Test
public void testRunSessionNoConflictCase() {
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(SampleData.SAMPLE_INPUT);
Map<String, Object> survivors = manager.getSurvivorMap();
for (String col : SampleData.COLUMNS.keySet()) {
assertEquals(SampleData.EXPECTED_SURVIVOR.get(col), survivors.get(col));
}
assertTrue("conflicts are not the same as expected.", //$NON-NLS-1$
manager.getConflictsOfSurvivor().equals(SampleData.EXPECTED_CONFLICT_OF_SURVIVOR));
manager.checkConflictRuleValid();
// Run the same test for a second time
manager.runSession(SampleData.SAMPLE_INPUT);
Map<String, Object> survivors2 = manager.getSurvivorMap();
for (String col : SampleData.COLUMNS.keySet()) {
assertEquals(SampleData.EXPECTED_SURVIVOR.get(col), survivors2.get(col));
}
assertTrue("conflicts are not the same as expected.", //$NON-NLS-1$
manager.getConflictsOfSurvivor().equals(SampleData.EXPECTED_CONFLICT_OF_SURVIVOR));
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
* no conflict case
*/
@Test
public void testRunSessionRegex() {
manager = new SurvivorshipManager(SampleData.RULE_PATH, SampleDataRegexFunction.PKG_NAME);
for (String str : SampleDataRegexFunction.COLUMNS.keySet()) {
manager.addColumn(str, SampleDataRegexFunction.COLUMNS.get(str));
}
for (RuleDefinition element : SampleDataRegexFunction.RULES) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(SampleDataRegexFunction.SAMPLE_INPUT);
Map<String, Object> survivors = manager.getSurvivorMap();
for (String col : SampleDataRegexFunction.COLUMNS.keySet()) {
assertEquals(SampleDataRegexFunction.EXPECTED_SURVIVOR.get(col), survivors.get(col));
}
assertTrue("conflicts are not the same as expected.", //$NON-NLS-1$
manager.getConflictsOfSurvivor().equals(SampleDataRegexFunction.EXPECTED_CONFLICT_OF_SURVIVOR));
// Run the same test for a second time
manager.runSession(SampleDataRegexFunction.SAMPLE_INPUT);
Map<String, Object> survivors2 = manager.getSurvivorMap();
for (String col : SampleDataRegexFunction.COLUMNS.keySet()) {
assertEquals(SampleDataRegexFunction.EXPECTED_SURVIVOR.get(col), survivors2.get(col));
}
assertTrue("conflicts are not the same as expected.", //$NON-NLS-1$
manager.getConflictsOfSurvivor().equals(SampleDataRegexFunction.EXPECTED_CONFLICT_OF_SURVIVOR));
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case1 most frequent->most recent and with null
*
* generate conflict by most common rule and resolve conflict by most recent rule
* recent date should be 08-08-2000 rather than 04-04-2000
*/
@Test
public void testRunSessionMostCommon2MostRecent() {
manager = new SurvivorshipManager(SampleData.RULE_PATH, SampleDataConflictMostCommon2MostRecent.PKG_NAME_CONFLICT);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("birthday")) { //$NON-NLS-1$
for (ConflictRuleDefinition element : SampleDataConflictMostCommon2MostRecent.RULES_CONFLICT_RESOLVE) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictMostCommon2MostRecent.RULES_CONFLICT) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 1", 1, conflictsOfSurvivor.size()); //$NON-NLS-1$
assertTrue("The column of conflict should be birthday", conflictsOfSurvivor.contains("birthday")); //$NON-NLS-1$ //$NON-NLS-2$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object birthdayObj = survivorMap.get("birthday"); //$NON-NLS-1$
assertTrue("The birthdayObj should not be null", birthdayObj != null); //$NON-NLS-1$
Date resultDate = (Date) birthdayObj;
// 08-08-2000 is we expect after implement code because we use most recent to resolve conflict
assertEquals("The resultDate should be 08-08-2000", "08-08-2000", //$NON-NLS-1$ //$NON-NLS-2$
SampleData.dateToString(resultDate, "dd-MM-yyyy")); //$NON-NLS-1$
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case2 most frequent->longest and with null
*
* For city1 column, after most common rule generate conflict beijing and shanghai then use Longest rule resolve conflict.
* Rusult is shanghai
*/
@Test
public void testRunSessionMostCommon2Longest() {
manager = new SurvivorshipManager(SampleData.RULE_PATH, SampleDataConflictMostCommon2Longest.PKG_NAME_CONFLICT_FRE_LONG);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("city1")) { //$NON-NLS-1$
for (ConflictRuleDefinition element : SampleDataConflictMostCommon2Longest.RULES_CONFLICT_RESOLVE) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictMostCommon2Longest.RULES_CONFLICT_FRE_LONG) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 1", 1, conflictsOfSurvivor.size()); //$NON-NLS-1$
assertTrue("The column of conflict should be birthday", conflictsOfSurvivor.contains("city1")); //$NON-NLS-1$ //$NON-NLS-2$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object cityObj = survivorMap.get("city1"); //$NON-NLS-1$
assertTrue("The birthdayObj should not be null", cityObj != null); //$NON-NLS-1$
String resultStr = (String) cityObj;
// Because we used longest rule to resolve conflict the frequency of shanghai is 2 and the frequency of beijing is 2.
// But length of beijing is 7 the length of shanghai is 8 so that we expect final result is shanghai
assertEquals("The resultStr should be shanghai", "shanghai", //$NON-NLS-1$ //$NON-NLS-2$
resultStr);
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case3 most frequent->longest->recent and with null
* firstName column used rule most common then get conflict between Tony and Lili.
* Then use Longest to resolve conflict but them can't.
* Then use most recent rule on birthday column to resolve conflict between 04-04-2000 and 06-06-2000
* we get final result 06-06-2000 and mapping to fistName column the result should be Tony.
* Because of the birthday of Tony is 06-06-2000.
* Note that Ignore blank has been check on this case
*/
@Test
public void testRunSessionMostCommon2Longest2MostRecent() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictMostCommon2Longest2MostRecent.PKG_NAME_CONFLICT_FRE_LONG_RECENT);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("firstName")) { //$NON-NLS-1$
for (ConflictRuleDefinition element : SampleDataConflictMostCommon2Longest2MostRecent.RULES_CONFLICT_RESOLVE) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictMostCommon2Longest2MostRecent.RULES_CONFLICT_FRE_LONG_RECENT) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 1", 1, conflictsOfSurvivor.size()); //$NON-NLS-1$
assertTrue("The column of conflict should be birthday", conflictsOfSurvivor.contains("firstName")); //$NON-NLS-1$ //$NON-NLS-2$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object firstNameObj = survivorMap.get("firstName"); //$NON-NLS-1$
assertTrue("The firstNameObj should not be null", firstNameObj != null); //$NON-NLS-1$
String resultStr = (String) firstNameObj;
// There Tony and Lili is conflict.we use most recent on the birthday column so that we choose Tony.
// Because of Tony birthday is 06-06-2000 but Lili birthday is 04-04-2000.
// After implement resolve conflict code the final result should be Tony but it is Lili until now.
// So that the assert will be failed it is noraml
assertEquals("The resultStr should be Tony", "Tony", //$NON-NLS-1$ //$NON-NLS-2$
resultStr);
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case10 most frequent->other survived value
*
*/
@Test
public void testRunSessionMostCommon2OtherSurvived() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictMostCommon2OtherSurvivedValue.PKG_NAME_CONFLICT);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("city1")) { //$NON-NLS-1$
for (ConflictRuleDefinition element : SampleDataConflictMostCommon2OtherSurvivedValue.RULES_CONFLICT_RESOLVE) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictMostCommon2OtherSurvivedValue.RULES_CONFLICT) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 1", 1, conflictsOfSurvivor.size()); //$NON-NLS-1$
assertTrue("The column of conflict should be city1", conflictsOfSurvivor.contains("city1")); //$NON-NLS-1$ //$NON-NLS-2$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object city1 = survivorMap.get("city1"); //$NON-NLS-1$
assertTrue("The firstNameObj should not be null", city1 != null); //$NON-NLS-1$
String resultStr = (String) city1;
// There Tony and Lili is conflict.we use most recent on the birthday column so that we choose Tony.
// Because of Tony birthday is 06-06-2000 but Lili birthday is 04-04-2000.
// After implement resolve conflict code the final result should be Tony but it is Lili until now.
// So that the assert will be failed it is noraml
assertEquals("The resultStr should be beijing", "beijing", //$NON-NLS-1$ //$NON-NLS-2$
resultStr);
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case4 most frequent->null->constant
*
* the constant is Green
* fill column is firstName column
* Because of there are two empty value so that we get value from firstName column.
* Then do most common rule we get Green=2 |Tony=2| null is ignore aotomatic.It is conflict.
* Because value "green" is The constant so that we ignore it.
* Final we get rusult "Tony"
*/
@Test
public void testRunSessionOtherColumn2MostCommon2Constant() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictOtherColumn2MostCommon2Constant.PKG_NAME_CONFLICT_FRE_NULL_CONSTANT);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("lastName")) { //$NON-NLS-1$
for (ConflictRuleDefinition element : SampleDataConflictOtherColumn2MostCommon2Constant.RULES_CONFLICT_RESOLVE) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictOtherColumn2MostCommon2Constant.RULES_CONFLICT_FRE_NULL_CONTSTANT) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object lastNameObj = survivorMap.get("lastName"); //$NON-NLS-1$
assertTrue("The lastNameObj should not be null", lastNameObj != null); //$NON-NLS-1$
String resultStr = (String) lastNameObj;
// Green is our Constant value which will be setting by user after that.
// In fact, Tony and Green is conflict after most common rule.
// But Green is constant so that we don't choose it.
// On my side result is Green too. need now code to implement it
assertEquals("The resultStr should be Tony", "Tony", //$NON-NLS-1$ //$NON-NLS-2$
resultStr);
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case4 most frequent->null->constant->empty duplicate sur
*
* 1.longest on firstName so that we get confilect Tony and Lili
* Then find shortest on city2 column and get xian which mapping to firstName column value is Tony
* Final we get firstName survived value is "Tony"
*
* 2.the constant is Green
* fill column is firstName column
* Because of there are two empty value so that we get value from firstName column.
* Then do most common rule we get Green=2 |Tony=2| null is ignore aotomatic.It is conflict.
* Because value "green" is The constant so that we ignore it.
*
* lastName survived value is empty
*/
@Test
public void testRunSessionOtherColumn2MostCommon2ConstantEmptyDuplicate() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictOtherColumn2MostCommon2ConstantEmptyDuplicate.PKG_NAME);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("lastName") || column.getName().equals("firstName")) { //$NON-NLS-1$ //$NON-NLS-2$
for (ConflictRuleDefinition element : SampleDataConflictOtherColumn2MostCommon2ConstantEmptyDuplicate.RULES_CONFLICT_RESOLVE) {
if (column.getName().equals(element.getTargetColumn())) {
column.getConflictResolveList().add(element);
}
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictOtherColumn2MostCommon2ConstantEmptyDuplicate.RULES_CONFLICT) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
// HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
// Assert.assertEquals("The size of conflictsOfSurvivor should be 1", 1, conflictsOfSurvivor.size()); //$NON-NLS-1$
// Assert.assertTrue("The column of conflict should be lastName", conflictsOfSurvivor.contains("lastName")); //$NON-NLS-1$ //$NON-NLS-2$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object lastNameObj = survivorMap.get("lastName"); //$NON-NLS-1$
assertTrue("The lastNameObj should not be null", lastNameObj != null); //$NON-NLS-1$
String resultStr2 = (String) lastNameObj;
Object firstNameObj = survivorMap.get("firstName"); //$NON-NLS-1$
assertTrue("The firstNameObj should not be null", firstNameObj != null); //$NON-NLS-1$
String resultStr1 = (String) firstNameObj;
// Green is our Constant value which will be setting by user after that.
// In fact, Tony and Green is conflict after most common rule.
// But Green is constant so that we don't choose it.
// On my side result is Green too. need now code to implement it
assertEquals("The resultStr should be Tony", "Tony", //$NON-NLS-1$ //$NON-NLS-2$
resultStr1);
assertEquals("The resultStr should be empty", "", //$NON-NLS-1$ //$NON-NLS-2$
resultStr2);
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case5 city1 column use Longest get survived value hebeihebei.
* birthday column use most Recent get survived value 08-08-2000.
* Althougth there are two values are 08-08-2000 but they are same so that no generate conflict
*/
@Test
public void testRunSessionTwoNoConflictColumnGetOneSameSurvivedValue() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictTwoNoConflictColumnGetOneSameSurvivedValue.PKG_NAME_CONFLICT_TWO_TARGET_ONE_COLUMN);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
manager.addColumn(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
}
for (RuleDefinition element : SampleDataConflictTwoNoConflictColumnGetOneSameSurvivedValue.RULES_CONFLICT_TWO_TARGET_ONE_COLUMN) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 0", 0, conflictsOfSurvivor.size()); //$NON-NLS-1$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
assertTrue("The size of SurvivorMap should be 1", survivorMap.size() == 1); //$NON-NLS-1$
Object birthdayObj = survivorMap.get("birthday"); //$NON-NLS-1$
assertTrue("The birthdayNameObj should not be null", birthdayObj != null); //$NON-NLS-1$
String resultDate = SampleData.dateToString((Date) birthdayObj, "dd-MM-yyyy"); //$NON-NLS-1$
assertEquals("The resultDate should be 08-08-2000", "08-08-2000", //$NON-NLS-1$ //$NON-NLS-2$
resultDate);
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case6 most frequent->longest->keep one of duplicates only
*
* now both survived values are beijing. After implememt code there should keep one value and it should be shanghai
* Because we will use most common to generate conflict between beijing=2 and shanghai=2.
* And use Longest to resolve conflict get final result shanghai.
* Both city1 and city2 values are "shanghai" it is duplicte .
* So that we just keep one of them.
*/
@Test
public void testRunSessionMostCommon2Longest2keepOneOfDuplicte() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictMostCommon2Longest2keepOneOfDuplicte.PKG_NAME_CONFLICT_TWO_TARGET_SAME_VALUE);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("city1") || column.getName().equals("city2")) { //$NON-NLS-1$ //$NON-NLS-2$
for (ConflictRuleDefinition element : SampleDataConflictMostCommon2Longest2keepOneOfDuplicte.RULES_CONFLICT_RESOLVE) {
if (column.getName().equals(element.getTargetColumn())) {
column.getConflictResolveList().add(element);
}
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictMostCommon2Longest2keepOneOfDuplicte.RULES_CONFLICT_TWO_TARGET_SAME_RESULT) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 2", 2, conflictsOfSurvivor.size()); //$NON-NLS-1$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
assertTrue("The size of SurvivorMap should be 2", survivorMap.size() == 2); //$NON-NLS-1$
Object city1Obj = survivorMap.get("city1"); //$NON-NLS-1$
assertTrue("The city1Obj should not be null", city1Obj != null); //$NON-NLS-1$
String resultDate = city1Obj.toString();
assertEquals("The resultDate should be shanghai", "shanghai", //$NON-NLS-1$ //$NON-NLS-2$
resultDate);
Object city2Obj = survivorMap.get("city2"); //$NON-NLS-1$
assertTrue("The city1Obj should not be null", city2Obj != null); //$NON-NLS-1$
resultDate = city2Obj.toString();
assertEquals("The resultDate should be beijing", "beijing", //$NON-NLS-1$ //$NON-NLS-2$
resultDate);
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case6 most frequent->shortest->keep one of duplicates only
*
* now both survived values are beijing. After implememt code there should keep one value and it should be beijing
* Because we will use most common to generate conflict between beijing=2 and shanghai=2.
* And use Shortest to resolve conflict get final result beijing.
* Both city1 and city2 values are "beijing" it is duplicte .
* So that we just keep one of them and choose longest value in the city2 conflict values
* city2 is "shanghai"
*/
@Test
public void testRunSessionMostCommon2Shortest2keepOneOfDuplicte() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictMostCommon2Longest2keepOneOfDuplicte.PKG_NAME_CONFLICT_TWO_TARGET_SAME_VALUE);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("city1") || column.getName().equals("city2")) { //$NON-NLS-1$ //$NON-NLS-2$
for (ConflictRuleDefinition element : SampleDataConflictMostCommon2Longest2keepOneOfDuplicte.RULES_CONFLICT_RESOLVE2) {
if (column.getName().equals(element.getTargetColumn())) {
column.getConflictResolveList().add(element);
}
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictMostCommon2Longest2keepOneOfDuplicte.RULES_CONFLICT_TWO_TARGET_SAME_RESULT) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 2", 2, conflictsOfSurvivor.size()); //$NON-NLS-1$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
assertTrue("The size of SurvivorMap should be 2", survivorMap.size() == 2); //$NON-NLS-1$
Object city1Obj = survivorMap.get("city1"); //$NON-NLS-1$
assertTrue("The city1Obj should not be null", city1Obj != null); //$NON-NLS-1$
String resultDate = city1Obj.toString();
assertEquals("The resultDate should be beijing", "beijing", //$NON-NLS-1$ //$NON-NLS-2$
resultDate);
Object city2Obj = survivorMap.get("city2"); //$NON-NLS-1$
assertTrue("The city1Obj should not be null", city2Obj != null); //$NON-NLS-1$
resultDate = city2Obj.toString();
assertEquals("The resultDate should be shanghai", "shanghai", //$NON-NLS-1$ //$NON-NLS-2$
resultDate);
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case7 most frequent->first one
* most common rule generate conflict then resolve by first one between conflict values
*/
@Test
public void testRunSessionMostCommonGetConflictThenDefauleRule() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictMostCommon2Longest2MostRecent.PKG_NAME_CONFLICT_FRE_LONG_RECENT);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
manager.addColumn(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
}
for (RuleDefinition element : SampleDataConflictMostCommon2Longest2MostRecent.RULES_CONFLICT_FRE_LONG_RECENT) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 1", 1, conflictsOfSurvivor.size()); //$NON-NLS-1$
assertTrue("The column of conflict should be firstName", conflictsOfSurvivor.contains("firstName")); //$NON-NLS-1$ //$NON-NLS-2$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object firstNameObj = survivorMap.get("firstName"); //$NON-NLS-1$
assertTrue("The firstNameObj should not be null", firstNameObj != null); //$NON-NLS-1$
assertResultIsFirstConflictedValue();
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case8 most frequent&&no check ignore blank no conflict
*
* The number of blank are 3 so that survived value should be " "(one space character)
*/
@Test
public void testRunSessionMostCommonAndNoIgnoreBlank() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictMostCommonAndNoIgnoreBlank.PKG_NAME_CONFLICT_FRE_LONG_RECENT_WITHOUT_IGNORE_BLANK);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
manager.addColumn(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
}
for (RuleDefinition element : SampleDataConflictMostCommonAndNoIgnoreBlank.RULES_CONFLICT_FRE_LONG_RECENT_NO_IGNORE_BLANK) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 0", 0, conflictsOfSurvivor.size()); //$NON-NLS-1$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object firstNameObj = survivorMap.get("firstName"); //$NON-NLS-1$
assertTrue("The firstNameObj should not be null", firstNameObj != null); //$NON-NLS-1$
String resultDate = firstNameObj.toString();
assertEquals("The resultDate should be \" \"", " ", resultDate); //$NON-NLS-1$ //$NON-NLS-2$
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#runSession(java.lang.String[][])}.
*
* @case9 most frequent&&no check ignore blank no conflict
*
* The reference column of city1 is city2
* city1 use shortest rule get conflict between "xian" and "lasa"
* city2 use shortest rule and no conflict final survived value is "xian"
* Because of city2 is reference column of city1 so that we take survived value from city2 column.
* After that both city1 and city2 keep same survived value which is "xian"
* I think it is conflict with {@link SurvivorshipManagerTest#testRunSessionMostCommon2Longest2keepOneOfDuplicte()}
*
*/
@Test
public void testRunSessionShortest2OtherColumnDuplicateSurvivedValue() {
manager = new SurvivorshipManager(SampleData.RULE_PATH,
SampleDataConflictShortest2OtherColumnDuplicateSurvivedValue.PKG_NAME_CONFLICT_TWO_TARGET_SAME_RESULT_REFERENCE_COLUMN);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
if (column.getName().equals("city1") || column.getName().equals("city2")) { //$NON-NLS-1$ //$NON-NLS-2$
for (ConflictRuleDefinition element : SampleDataConflictShortest2OtherColumnDuplicateSurvivedValue.RULES_CONFLICT_RESOLVE) {
if (column.getName().equals(element.getTargetColumn())) {
column.getConflictResolveList().add(element);
}
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictShortest2OtherColumnDuplicateSurvivedValue.RULES_CONFLICT_TWO_TARGET_SAME_RESULT_REFERENCE_COLUMN) {
manager.addRuleDefinition(element);
}
manager.initKnowledgeBase();
manager.checkConflictRuleValid();
manager.runSession(getTableValue("/org.talend.survivorship.conflict/conflicts.csv")); //$NON-NLS-1$
// 5. Retrieve results
HashSet<String> conflictsOfSurvivor = manager.getConflictsOfSurvivor();
assertEquals("The size of conflictsOfSurvivor should be 1", 1, conflictsOfSurvivor.size()); //$NON-NLS-1$
Map<String, Object> survivorMap = manager.getSurvivorMap();
assertTrue("The SurvivorMap should not be null", survivorMap != null); //$NON-NLS-1$
Object city1NameObj = survivorMap.get("city1"); //$NON-NLS-1$
assertTrue("The city1NameObj should not be null", city1NameObj != null); //$NON-NLS-1$
String resultDate = city1NameObj.toString();
assertEquals("The resultDate should be lasa", "lasa", resultDate); //$NON-NLS-1$ //$NON-NLS-2$
Object city2NameObj = survivorMap.get("city2"); //$NON-NLS-1$
assertTrue("The city2NameObj should not be null", city2NameObj != null); //$NON-NLS-1$
resultDate = city2NameObj.toString();
assertEquals("The resultDate should be xian", "xian", resultDate); //$NON-NLS-1$ //$NON-NLS-2$
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#checkConflictRuleValid()}.
* check 1 mappingTo need firstName longest need to lastName
*/
@Test
public void testCheckConflictRuleValid() {
manager = new SurvivorshipManager(SampleData.RULE_PATH, SampleData.PKG_NAME);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
for (ConflictRuleDefinition element : SampleDataConflictCheckRule.RULES_CONFLICT_RESOLVE) {
if (column.getName().equals(element.getTargetColumn())) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictCheckRule.RULES) {
manager.addRuleDefinition(element);
}
Map<String, List<String>> checkConflictRuleValid = manager.checkConflictRuleValid();
assertTrue("firstName is not exist survived value so that it must be invalid value", //$NON-NLS-1$
checkConflictRuleValid.containsKey("firstName")); //$NON-NLS-1$
assertEquals("firstName is not exist survived value so that it must be invalid value", //$NON-NLS-1$
"firstName is not exist survived value in the rule list", //$NON-NLS-1$
checkConflictRuleValid.get("firstName").get(0)); //$NON-NLS-1$
assertTrue("lastName is not exist survived value so that it must be invalid value", //$NON-NLS-1$
checkConflictRuleValid.containsKey("lastName")); //$NON-NLS-1$
assertEquals("lastName is not exist survived value so that it must be invalid value", //$NON-NLS-1$
"lastName is not exist survived value in the rule list", //$NON-NLS-1$
checkConflictRuleValid.get("lastName").get(0)); //$NON-NLS-1$
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#checkConflictRuleValid()}.
* 2 rules Circular dependency case
*/
@Test
public void testCheckConflictRuleValidWith2RulesCycDependency() {
manager = new SurvivorshipManager(SampleData.RULE_PATH, SampleData.PKG_NAME);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
for (ConflictRuleDefinition element : SampleDataConflictCheckRule.RULES_CONFLICT_RESOLVE_CASE2) {
if (column.getName().equals(element.getTargetColumn())) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictCheckRule.RULES) {
manager.addRuleDefinition(element);
}
Map<String, List<String>> checkConflictRuleValid = manager.checkConflictRuleValid();
assertTrue("city1 and city2 should be exist circular dependency", //$NON-NLS-1$
checkConflictRuleValid.containsKey("city1")); //$NON-NLS-1$
assertEquals("city1 can not mapping to city2 because of circular dependency", //$NON-NLS-1$
"city1 can not mapping to city2 because of circular dependency", //$NON-NLS-1$
checkConflictRuleValid.get("city1").get(0)); //$NON-NLS-1$
assertTrue("city2 and city1 should be exist circular dependency", //$NON-NLS-1$
checkConflictRuleValid.containsKey("city2")); //$NON-NLS-1$
assertEquals("city2 can not mapping to city1 because of circular dependency", //$NON-NLS-1$
"city2 can not mapping to city1 because of circular dependency", //$NON-NLS-1$
checkConflictRuleValid.get("city2").get(0)); //$NON-NLS-1$
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#checkConflictRuleValid()}.
* 3 rules Circular dependency case
*/
@Test
public void testCheckConflictRuleValidWith3RulesCycDependency() {
manager = new SurvivorshipManager(SampleData.RULE_PATH, SampleData.PKG_NAME);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
for (ConflictRuleDefinition element : SampleDataConflictCheckRule.RULES_CONFLICT_RESOLVE_CASE3) {
if (column.getName().equals(element.getTargetColumn())) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictCheckRule.RULES_CASE3) {
manager.addRuleDefinition(element);
}
Map<String, List<String>> checkConflictRuleValid = manager.checkConflictRuleValid();
assertTrue("city1 and city2 should be exist circular dependency", //$NON-NLS-1$
checkConflictRuleValid.containsKey("city1")); //$NON-NLS-1$
assertTrue("city2 and id should be exist circular dependency", //$NON-NLS-1$
checkConflictRuleValid.containsKey("city2")); //$NON-NLS-1$
assertTrue("id and city1 should be exist circular dependency", //$NON-NLS-1$
checkConflictRuleValid.containsKey("id")); //$NON-NLS-1$
assertEquals("city1 can not mapping to city2 because of circular dependency", //$NON-NLS-1$
"city1 can not mapping to city2 because of circular dependency", //$NON-NLS-1$
checkConflictRuleValid.get("city1").get(0)); //$NON-NLS-1$
assertEquals("city2 can not mapping to id because of circular dependency", //$NON-NLS-1$
"city2 can not mapping to id because of circular dependency", //$NON-NLS-1$
checkConflictRuleValid.get("city2").get(0)); // $NON- //$NON-NLS-1$
assertEquals("id can not mapping to city1 because of circular dependency", //$NON-NLS-1$
"id can not mapping to city1 because of circular dependency", //$NON-NLS-1$
checkConflictRuleValid.get("id").get(0)); //$NON-NLS-1$
}
/**
* Test method for {@link org.talend.survivorship.SurvivorshipManager#checkConflictRuleValid()}.
* 4 no Circular dependency case
*/
@Test
public void testCheckConflictRuleValidNormal() {
manager = new SurvivorshipManager(SampleData.RULE_PATH, SampleData.PKG_NAME);
for (String str : SampleDataConflict.COLUMNS_CONFLICT.keySet()) {
Column column = new Column(str, SampleDataConflict.COLUMNS_CONFLICT.get(str));
for (ConflictRuleDefinition element : SampleDataConflictCheckRule.RULES_CONFLICT_RESOLVE_CASE4) {
if (column.getName().equals(element.getTargetColumn())) {
column.getConflictResolveList().add(element);
}
}
manager.getColumnList().add(column);
}
for (RuleDefinition element : SampleDataConflictCheckRule.RULES_CASE3) {
manager.addRuleDefinition(element);
}
manager.dataset = new DataSet(manager.getColumnList());
Map<String, List<String>> checkConflictRuleValid = manager.checkConflictRuleValid();
assertEquals("All of rule should be valid so that the size of map should be 0", 0, checkConflictRuleValid.size()); //$NON-NLS-1$
assertEquals("column oder size should be 9 because of there are 9 columns", 9, //$NON-NLS-1$
manager.getDataSet().getColumnOrder().size());
assertEquals("The first one should be firstName", "firstName", manager.getDataSet().getColumnOrder().get(0).getName()); //$NON-NLS-1$ //$NON-NLS-2$
assertEquals("The first one should be city1", "city1", manager.getDataSet().getColumnOrder().get(1).getName()); //$NON-NLS-1$ //$NON-NLS-2$
assertEquals("The first one should be city2", "city2", manager.getDataSet().getColumnOrder().get(2).getName()); //$NON-NLS-1$ //$NON-NLS-2$
}
/**
* Create by zshen judge whether conflict value is right
*/
private void assertResultIsFirstConflictedValue() {
Map<String, Object> survivorMap = manager.getSurvivorMap();
manager.getDataSet().getRecordList().get(1).getAttribute("firstName").getValue(); //$NON-NLS-1$
for (Set<String> ciflictValue : manager.getConflictList()) {
if (ciflictValue.size() > 0) {
ciflictValue.toArray()[0].toString();
}
}
String survivedColumnValue = null;
String survivedColumnName = null;
for (String columnName : survivorMap.keySet()) {
survivedColumnName = columnName;
survivedColumnValue = survivorMap.get(columnName).toString();
if (survivedColumnName != null && survivedColumnValue != null) {
break;
}
}
int index = 0;
for (Record record : manager.getDataSet().getRecordList()) {
Attribute currentAttribute = record.getAttribute(survivedColumnName);
if (currentAttribute == null) {
continue;
}
String currentValue = currentAttribute.getValue().toString();
// survivedValue should be same with currentValue
if (survivedColumnValue != null && survivedColumnValue.equals(currentValue)) {
assertTrue("first value should be " + currentValue, currentValue.equals(survivedColumnValue)); //$NON-NLS-1$
break;
// survivedValue should not be same with currentValue
} else if (manager.getConflictList().get(index) != null) {
assertFalse("first value should not be " + currentValue, currentValue.equals(survivedColumnValue)); //$NON-NLS-1$
break;
}
index++;
}
}
/**
*
* Create by zshen get input data from special csv file
*
* @param file the file full path
* @return array of input data
*/
protected Object[][] getTableValue(String file) {
String pathString = ""; //$NON-NLS-1$
try {
pathString = this.getClass().getResource(file).toURI().getPath();
} catch (URISyntaxException e) {
e.printStackTrace();
}
BufferedReader br = null;
String line = ""; //$NON-NLS-1$
String cvsSplitBy = ","; //$NON-NLS-1$
Object[][] result = new Object[10][9];
try {
br = new BufferedReader(new FileReader(pathString));
int index = 0;
while ((line = br.readLine()) != null) {
Object[] items = line.split(cvsSplitBy);
int y = 0;
for (Object readArray : items) {
if (readArray.toString().equals("null")) { //$NON-NLS-1$
readArray = null;
}
if (y == 5 && readArray != null) {
result[index][5] = Integer.getInteger(readArray.toString());
} else if (y == 6 && readArray != null) {
result[index][6] = SampleData.stringToDate(readArray.toString(), "dd-MM-yyyy"); //$NON-NLS-1$
} else if (y == 8 && readArray != null) {
result[index][8] = Integer.parseInt(readArray.toString());
} else {
result[index][y] = readArray;
}
y++;
}
index++;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null) {
br.close();
}
} catch (IOException e) {
// no need to be implements
}
}
return result;
}
}