// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.survivorship.sample; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import org.talend.survivorship.model.RuleDefinition; import org.talend.survivorship.model.RuleDefinition.Function; import org.talend.survivorship.model.RuleDefinition.Order; /** * Sample input data and result expectation for unit tests. * * */ public final class SampleData { public static final String RULE_PATH = "src/test/resources/generated/"; //$NON-NLS-1$ public static final String PKG_NAME = "org.talend.survivorship.sample"; //$NON-NLS-1$ public static final String PKG_NAME_CONFLICT_FRE_NULL_FRE = "org.talend.survivorship.conflict.fre_null_fre"; //$NON-NLS-1$ public static final Object[][] SAMPLE_INPUT = { { "GRIZZARD CO.", "Lili", "110 N MARYLAND AVE", "GLENDALE", "CA", "912066", "FR", "8185431314", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ stringToDate("20110101", "yyyyMMdd"), 1.0, 18, 1985, "Something" }, //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ { "GRIZZARD", "Tony", "110 NORTH MARYLAND AVENUE", "GLENDALE", "CA", "91205", "US", "9003254892", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ stringToDate("20110118", "yyyyMMdd"), 0.9879999999999999, 25, 0, "" }, //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ { "GRIZZARD INC", "Tom", "110 N. MARYLAND AVENUE", "GLENDALE", "CA", "91206", "US", "(818) 543-1315", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ stringToDate("20110103", "yyyyMMdd"), 0.8572727272727272, 31, 1970, null }, //$NON-NLS-1$ //$NON-NLS-2$ { "GRIZZARD CO", "li", "1480 S COLORADO BOULEVARD", "LOS ANGELES", "CA", "91206", "US", "(800) 325-4892", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ stringToDate("20110115", "yyyyMMdd"), 0.742319482, 35, 0, null } }; //$NON-NLS-1$ //$NON-NLS-2$ public static final LinkedHashMap<String, String> COLUMNS = new LinkedHashMap<String, String>() { private static final long serialVersionUID = 1L; { put("acctName", "String"); //$NON-NLS-1$ //$NON-NLS-2$ put("firstName", "String"); //$NON-NLS-1$ //$NON-NLS-2$ put("addr", "String"); //$NON-NLS-1$ //$NON-NLS-2$ put("city", "String"); //$NON-NLS-1$ //$NON-NLS-2$ put("state", "String"); //$NON-NLS-1$ //$NON-NLS-2$ put("zip", "String"); //$NON-NLS-1$ //$NON-NLS-2$ put("country", "String"); //$NON-NLS-1$ //$NON-NLS-2$ put("phone", "String"); //$NON-NLS-1$ //$NON-NLS-2$ put("date", "java.util.Date"); //$NON-NLS-1$ //$NON-NLS-2$ put("score", "double"); //$NON-NLS-1$ //$NON-NLS-2$ put("age", "int"); //$NON-NLS-1$ //$NON-NLS-2$ put("birthyear", "int"); //$NON-NLS-1$ //$NON-NLS-2$ put("completeness", "String"); //$NON-NLS-1$ //$NON-NLS-2$ } }; public static final RuleDefinition[] RULES = { new RuleDefinition(Order.SEQ, "CompletenessRule", null, Function.MostComplete, null, "completeness", true), //$NON-NLS-1$ //$NON-NLS-2$ new RuleDefinition(Order.SEQ, "LengthAcct", "acctName", Function.Expression, ".length > 11", "acctName", true), //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ new RuleDefinition(Order.SEQ, "LongestAddr", "addr", Function.Longest, null, "addr", true), //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ new RuleDefinition(Order.SEQ, "HighScore", "score", Function.Expression, " > 0.95", "score", true), //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ new RuleDefinition(Order.SEQ, "MostCommonCity", "city", Function.MostCommon, null, "city", true), //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ new RuleDefinition(Order.SEQ, "MostCommonZip", "zip", Function.MostCommon, null, "zip", true), //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ // new RuleDefinition(Order.MC, "ZipRegex", "zip", Function.MatchRegex, "\\\\d{5}",null, true), new RuleDefinition(Order.MT, null, null, null, null, "state", true), //$NON-NLS-1$ new RuleDefinition(Order.MT, null, null, null, null, "country", true), //$NON-NLS-1$ new RuleDefinition(Order.SEQ, "LatestPhone", "date", Function.MostRecent, null, "date", true), //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ new RuleDefinition(Order.MT, null, null, null, null, "phone", true), //$NON-NLS-1$ new RuleDefinition(Order.SEQ, "firstNameRule", "firstName", Function.MatchRegex, "^\\\\w{2}$", "firstName", true) }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ public static final RuleDefinition[] RULES_CONFLICT_FRE_NULL_FRE = { new RuleDefinition(Order.SEQ, "more_common_middleName", "middleName", //$NON-NLS-1$ //$NON-NLS-2$ Function.MostCommon, null, "middleName", false) }; //$NON-NLS-1$ public static final HashMap<String, Object> EXPECTED_SURVIVOR = new HashMap<String, Object>() { private static final long serialVersionUID = 1L; { put("acctName", "GRIZZARD CO."); //$NON-NLS-1$ //$NON-NLS-2$ put("firstName", "li"); //$NON-NLS-1$ //$NON-NLS-2$ put("addr", "110 NORTH MARYLAND AVENUE"); //$NON-NLS-1$ //$NON-NLS-2$ put("city", "GLENDALE"); //$NON-NLS-1$ //$NON-NLS-2$ put("state", "CA"); //$NON-NLS-1$ //$NON-NLS-2$ put("zip", "91206"); //$NON-NLS-1$ //$NON-NLS-2$ put("country", "US"); //$NON-NLS-1$ //$NON-NLS-2$ put("phone", "9003254892"); //$NON-NLS-1$ //$NON-NLS-2$ put("date", stringToDate("20110118", "yyyyMMdd")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ put("score", 1.0); //$NON-NLS-1$ put("age", null); //$NON-NLS-1$ put("birthyear", null); //$NON-NLS-1$ put("completeness", "Something"); //$NON-NLS-1$ //$NON-NLS-2$ } }; public static final HashSet<String> EXPECTED_CONFLICT_OF_SURVIVOR = new HashSet<String>() { private static final long serialVersionUID = 1L; { add("acctName"); //$NON-NLS-1$ add("score"); //$NON-NLS-1$ add("addr"); //$NON-NLS-1$ } }; public static Date stringToDate(String dateString, String dateFormat) { Date date = null; try { date = new SimpleDateFormat(dateFormat).parse(dateString); } catch (ParseException e) { e.printStackTrace(); } return date; } public static String dateToString(Date date, String dateFormat) { String str = null; str = new SimpleDateFormat(dateFormat).format(date); return str; } }