// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.semantic.classifier.custom; import static org.junit.Assert.*; import java.io.*; import java.net.URL; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.util.*; import org.junit.Before; import org.junit.Test; import org.talend.dataquality.semantic.classifier.ISubCategory; /** * DOC qiongli class global comment. Detailled comment */ public class UserDefinedClassifierTest { // private UserDefinedClassifier userDefinedClassifier = null; /** * created by talend on 2015-07-28 Detailled comment. * */ private static enum STATE { Alabama, Alaska, Arizona, Arkansas, California, Colorado, Connecticut, Delaware, Florida, Georgia, Hawaii, Idaho, Illinois, Indiana, Iowa, Kansas, Kentucky, Louisiana, Maine, Maryland, Massachusetts, Michigan, Minnesota, Mississippi, Missouri, Montana, Nebraska, Nevada, New_Hampshire, New_Jersey, New_Mexico, New_York, North_Carolina, North_Dakota, Ohio, Oklahoma, Oregon, Pennsylvania, Rhode_Island, South_Carolina, South_Dakota, Tennessee, Texas, Utah, Vermont, Virginia, Washington, West_Virginia, Wisconsin, Wyoming; /* * (non-Javadoc) * * @see java.lang.Enum#toString() */ @Override public String toString() { return super.toString().replace("_", " "); //$NON-NLS-1$ //$NON-NLS-2$ } } private final String TLD_NAME_URL = "http://data.iana.org/TLD/tlds-alpha-by-domain.txt"; private final int MAX_TLD_LENGTH = 24; /** * Map from technical ID to category name * 583edc44ec06957a34fa6456 -> EN_MONTH * 583edc44ec06957a34fa646a -> EN_MONTH_ABBREV * 583edc44ec06957a34fa644c -> UK_SSN * 583edc44ec06957a34fa6448 -> SE_SSN * 583edc44ec06957a34fa6444 -> FR_SSN * 583edc44ec06957a34fa642e -> US_SSN * 583edc44ec06957a34fa6430 -> EMAIL * 583edc44ec06957a34fa645e -> FR_CODE_COMMUNE_INSEE * 583edc44ec06957a34fa643c -> FR_POSTAL_CODE * 583edc44ec06957a34fa647c -> DE_POSTAL_CODE * 583edc44ec06957a34fa6488 -> US_POSTAL_CODE * 583edc44ec06957a34fa6474 -> US_STATE_CODE * 583edc44ec06957a34fa6470 -> US_STATE * 583edc44ec06957a34fa6434 -> URL * 583edc44ec06957a34fa642c -> WEB_DOMAIN * 583edc44ec06957a34fa6446 -> ISBN_10 * 583edc44ec06957a34fa644a -> ISBN_13 * 583edc44ec06957a34fa6438 -> MAC_ADDRESS * 583edc44ec06957a34fa644e -> EN_MONEY_AMOUNT * 583edc44ec06957a34fa6468 -> FR_MONEY_AMOUNT * 583edc44ec06957a34fa643e -> DE_PHONE * 583edc44ec06957a34fa646c -> FR_PHONE * 583edc44ec06957a34fa645c -> US_PHONE * 583edc44ec06957a34fa6476 -> COLOR_HEX_CODE * 583edc44ec06957a34fa6432 -> BG_VAT_NUMBER * 583edc44ec06957a34fa6482 -> AT_VAT_NUMBER * 583edc44ec06957a34fa646e -> GEO_COORDINATES * 583edc44ec06957a34fa6436 -> GEO_COORDINATE * 583edc44ec06957a34fa6462 -> GEO_COORDINATES_DEG * 583edc44ec06957a34fa643a -> EN_WEEKDAY * 583edc44ec06957a34fa6484 -> SEDOL * 583edc44ec06957a34fa647a -> HDFS_URL * 583edc44ec06957a34fa6472 -> FILE_URL * 583edc44ec06957a34fa645a -> MAILTO_URL * 583edc44ec06957a34fa6464 -> DATA_URL * 583edc44ec06957a34fa6460 -> IBAN * */ public static Map<String, String[]> TEST_DATA = new LinkedHashMap<String, String[]>() { private static final long serialVersionUID = -5067273062214728849L; { // put (value, expected categories) put("CDG", new String[] {}); //$NON-NLS-1$ put("suresnes", new String[] {});//$NON-NLS-1$ put("Paris", new String[] {});//$NON-NLS-1$ put("France", new String[] {});//$NON-NLS-1$ put("? - lfd", new String[] {});//$NON-NLS-1$ put("CHN", new String[] {});//$NON-NLS-1$ put("cat", new String[] {});//$NON-NLS-1$ put("2012-02-03 7:08PM", new String[] {});//$NON-NLS-1$ put("1/2/2012", new String[] {});//$NON-NLS-1$ put("january", new String[] { "583edc44ec06957a34fa6456" });//$NON-NLS-1$ //$NON-NLS-2$ put("jan", new String[] { "583edc44ec06957a34fa646a" });//$NON-NLS-1$ //$NON-NLS-2$ put("february", new String[] { "583edc44ec06957a34fa6456" });//$NON-NLS-1$ //$NON-NLS-2$ put("march", new String[] { "583edc44ec06957a34fa6456" });//$NON-NLS-1$ //$NON-NLS-2$ put("auG", new String[] { "583edc44ec06957a34fa646a" });//$NON-NLS-1$ //$NON-NLS-2$ put("MAY", new String[] { "583edc44ec06957a34fa6456", "583edc44ec06957a34fa646a" });//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ put("januar", new String[] {});//$NON-NLS-1$ put("janvier", new String[] {});//$NON-NLS-1$ put("AB123456C", new String[] { "583edc44ec06957a34fa644c" });//$NON-NLS-1$ //$NON-NLS-2$ put("AB 12 34 56 C", new String[] { "583edc44ec06957a34fa644c" });//$NON-NLS-1$ //$NON-NLS-2$ put("TN 31 12 58 F", new String[] { "583edc44ec06957a34fa644c" });//$NON-NLS-1$ //$NON-NLS-2$ put("20120101-3842", new String[] { "583edc44ec06957a34fa6448" });//$NON-NLS-1$ //$NON-NLS-2$ put("120101-3842", new String[] {});//$NON-NLS-1$ put("christophe", new String[] {});//$NON-NLS-1$ put("sda@talend.com", new String[] { "583edc44ec06957a34fa6430" });//$NON-NLS-1$ //$NON-NLS-2$ put("abc@gmail.com", new String[] { "583edc44ec06957a34fa6430" }); //$NON-NLS-1$ //$NON-NLS-2$ put(" abc@gmail.com ", new String[] { "583edc44ec06957a34fa6430" }); //$NON-NLS-1$ //$NON-NLS-2$ put("abc@gmail.com ", new String[] { "583edc44ec06957a34fa6430" }); //$NON-NLS-1$ //$NON-NLS-2$ put(" abc@gmail.com", new String[] { "583edc44ec06957a34fa6430" }); //$NON-NLS-1$ //$NON-NLS-2$ put("abc@gmail", new String[] {}); //$NON-NLS-1$ put("12345", new String[] { "583edc44ec06957a34fa645e", "583edc44ec06957a34fa643c", "583edc44ec06957a34fa647c", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ "583edc44ec06957a34fa6488" }); //$NON-NLS-1$ put("2A345", new String[] { "583edc44ec06957a34fa645e" }); //$NON-NLS-1$ //$NON-NLS-2$ put("12345-6789", new String[] { "583edc44ec06957a34fa6488" }); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ put("Talend", new String[] {}); //$NON-NLS-1$ put("9 rue pages, 92150 suresnes", new String[] {}); //$NON-NLS-1$ put("avenue des champs elysees", new String[] {}); //$NON-NLS-1$ put("MA", new String[] { "583edc44ec06957a34fa6474" }); //$NON-NLS-1$ //$NON-NLS-2$ put("FL", new String[] { "583edc44ec06957a34fa6474" }); //$NON-NLS-1$ //$NON-NLS-2$ put("FLorIda", new String[] { "583edc44ec06957a34fa6470" }); //$NON-NLS-1$ //$NON-NLS-2$ put("FLORIDA", new String[] { "583edc44ec06957a34fa6470" }); //$NON-NLS-1$ //$NON-NLS-2$ put("New Hampshire", new String[] { "583edc44ec06957a34fa6470" });//$NON-NLS-1$ //$NON-NLS-2$ put("Arizona", new String[] { "583edc44ec06957a34fa6470" });//$NON-NLS-1$ //$NON-NLS-2$ put("Alabama", new String[] { "583edc44ec06957a34fa6470" });//$NON-NLS-1$ //$NON-NLS-2$ put("F", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("M", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Male", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("female", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("http://www.talend.com", new String[] { "583edc44ec06957a34fa6434" });//$NON-NLS-1$ //$NON-NLS-2$ put("www.talend.com", new String[] { "583edc44ec06957a34fa642c" });//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ put("talend.com", new String[] { "583edc44ec06957a34fa642c" });//$NON-NLS-1$ //$NON-NLS-2$ put("talend.com", new String[] { "583edc44ec06957a34fa642c" });//$NON-NLS-1$ //$NON-NLS-2$ put("talend.veryLongTDL", new String[] { "583edc44ec06957a34fa642c" });//$NON-NLS-1$ //$NON-NLS-2$ put("talend.TDLlongerThan25Characters", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("talendSmallerThan63Charactersxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.com", //$NON-NLS-1$ new String[] { "583edc44ec06957a34fa642c" });//$NON-NLS-1$ put("talendLongerThan63Charactersxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.com", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("1 81 04 95 201 569 62", new String[] { "583edc44ec06957a34fa6444" });//$NON-NLS-1$ //$NON-NLS-2$ put("1810495201569", new String[] { "583edc44ec06957a34fa6444" });//$NON-NLS-1$ //$NON-NLS-2$ put("123-45-6789", new String[] { "583edc44ec06957a34fa642e" });//$NON-NLS-1$ //$NON-NLS-2$ put("azjfnskjqnfoajr", new String[] {});//$NON-NLS-1$ put("ISBN 9-787-11107-5", new String[] { "583edc44ec06957a34fa6446" });//$NON-NLS-1$ //$NON-NLS-2$ put("SINB 9-787-11107-5", new String[] {});//$NON-NLS-1$ put("ISBN 2-711-79141-6", new String[] { "583edc44ec06957a34fa6446" });//$NON-NLS-1$ //$NON-NLS-2$ put("ISBN-13: 978-2711791415", new String[] { "583edc44ec06957a34fa644a" });//$NON-NLS-1$ //$NON-NLS-2$ put("ISBN: 978-2711791415", new String[] { "583edc44ec06957a34fa644a" });//$NON-NLS-1$ //$NON-NLS-2$ put("ISBN 978-2711791415", new String[] { "583edc44ec06957a34fa644a" });//$NON-NLS-1$ //$NON-NLS-2$ put("A4:4E:31:B9:C5:B4", new String[] { "583edc44ec06957a34fa6438" });//$NON-NLS-1$ //$NON-NLS-2$ put("A4:4E:31:B9:C5:B4:B4", new String[] {});//$NON-NLS-1$ put("A4-4E-31-B9-C5-B4", new String[] {});//$NON-NLS-1$ put("$3,000", new String[] { "583edc44ec06957a34fa644e" });//$NON-NLS-1$ //$NON-NLS-2$ put("$3000", new String[] { "583edc44ec06957a34fa644e" });//$NON-NLS-1$ //$NON-NLS-2$ put("$ 3000", new String[] {});//$NON-NLS-1$ put("CA$3000", new String[] { "583edc44ec06957a34fa644e" });//$NON-NLS-1$ //$NON-NLS-2$ put("€3000", new String[] { "583edc44ec06957a34fa644e" });//$NON-NLS-1$ //$NON-NLS-2$ put("3000 €", new String[] { "583edc44ec06957a34fa6468" });//$NON-NLS-1$ //$NON-NLS-2$ put("345,56 €", new String[] { "583edc44ec06957a34fa6468" });//$NON-NLS-1$ //$NON-NLS-2$ put("35 k€", new String[] { "583edc44ec06957a34fa6468" });//$NON-NLS-1$ //$NON-NLS-2$ put("35 054 T€", new String[] { "583edc44ec06957a34fa6468" });//$NON-NLS-1$ //$NON-NLS-2$ put("35 456 544 k£", new String[] { "583edc44ec06957a34fa6468" });//$NON-NLS-1$ //$NON-NLS-2$ put("00496-8738059275", new String[] { "583edc44ec06957a34fa643e" });//$NON-NLS-1$ //$NON-NLS-2$ put("00338.01345678", new String[] { "583edc44ec06957a34fa646c" });//$NON-NLS-1$ //$NON-NLS-2$ put("John Doe", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Georges W. Bush", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Georges W. Bush Jr.", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Georges W. Bush, Jr.", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Georges W. Bush II", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Georges W. Bush III", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Georges W. Bush IV", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Georges Bush IV", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Jean-Michel Louis", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("David F Walker", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("J. S. Smith, Jr.", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("Catherine Zeta-Jones", new String[] {});//$NON-NLS-1$ //$NON-NLS-2$ put("#990000", new String[] { "583edc44ec06957a34fa6476" });//$NON-NLS-1$ //$NON-NLS-2$ put("#AAAAAA", new String[] { "583edc44ec06957a34fa6476" });//$NON-NLS-1$ //$NON-NLS-2$ put("#cc3366", new String[] { "583edc44ec06957a34fa6476" });//$NON-NLS-1$ //$NON-NLS-2$ put("#c1d906", new String[] { "583edc44ec06957a34fa6476" });//$NON-NLS-1$ //$NON-NLS-2$ put("BG123456789", new String[] { "583edc44ec06957a34fa6432" });//$NON-NLS-1$ //$NON-NLS-2$ put("BG123456789", new String[] { "583edc44ec06957a34fa6432" });//$NON-NLS-1$ //$NON-NLS-2$ put("AT12345678", new String[] { "583edc44ec06957a34fa6482" });//$NON-NLS-1$ //$NON-NLS-2$ // put("132.2356", new String[] { "LONGITUDE_LATITUDE_COORDINATE" }); put("40.7127837,-74.00594130000002", new String[] { "583edc44ec06957a34fa646e" }); //$NON-NLS-1$ //$NON-NLS-2$ put("30.082993", new String[] { "583edc44ec06957a34fa6436" }); //$NON-NLS-1$ //$NON-NLS-2$ put("N 0:59:59.99,E 0:59:59.99", new String[] { "583edc44ec06957a34fa6462" }); //$NON-NLS-1$ //$NON-NLS-2$ put("00:00", new String[] {}); //$NON-NLS-1$ //$NON-NLS-2$ put("12:00", new String[] {}); //$NON-NLS-1$ //$NON-NLS-2$ put("11:23", new String[] {}); //$NON-NLS-1$ //$NON-NLS-2$ put("15:53", new String[] {}); //$NON-NLS-1$ //$NON-NLS-2$ put("23:59", new String[] {}); //$NON-NLS-1$ //$NON-NLS-2$ put("Monday", new String[] { "583edc44ec06957a34fa643a" }); //$NON-NLS-1$ //<$NON-NLS-2$ put("MonDay", new String[] { "583edc44ec06957a34fa643a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("MOnDay", new String[] { "583edc44ec06957a34fa643a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("MOn", new String[] { "583edc44ec06957a34fa643a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("Tue", new String[] { "583edc44ec06957a34fa643a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("Wed", new String[] { "583edc44ec06957a34fa643a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("Wednesday", new String[] { "583edc44ec06957a34fa643a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("Thurs", new String[] { "583edc44ec06957a34fa643a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("25:59", new String[] {}); // does not match TIME (as expected) //$NON-NLS-1$ put("0067340", new String[] { "583edc44ec06957a34fa6484" }); //$NON-NLS-1$ //$NON-NLS-2$ put("4155586", new String[] { "583edc44ec06957a34fa6484", "583edc44ec06957a34fa645c" }); //$NON-NLS-1$ //$NON-NLS-2$ put("(541) 754-3010", new String[] { "583edc44ec06957a34fa645c" }); //$NON-NLS-1$ //$NON-NLS-2$ put("B01HL06", new String[] { "583edc44ec06957a34fa6484" }); //$NON-NLS-1$ //$NON-NLS-2$ put("132.2356", new String[] {}); //$NON-NLS-1$ put("R&D", new String[] {}); //$NON-NLS-1$ put("hdfs://127.0.0.1/user/luis/sample.txt", new String[] { "583edc44ec06957a34fa647a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("hdfs://toto.com/user/luis/sample.txt", new String[] { "583edc44ec06957a34fa647a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("file://localhost/c/WINDOWS/clock.avi", new String[] { "583edc44ec06957a34fa6472" }); //$NON-NLS-1$ //$NON-NLS-2$ put("file://localhost/c|/WINDOWS/clock.avi", new String[] { "583edc44ec06957a34fa6472" }); //$NON-NLS-1$ //$NON-NLS-2$ // " put("file://localhost/c:/WINDOWS/clock.avi", new String[] { "583edc44ec06957a34fa6472" }); //$NON-NLS-1$ //$NON-NLS-2$ put("file:///C:/WORKSPACE/reports.html", new String[] { "583edc44ec06957a34fa6472" }); //$NON-NLS-1$ //$NON-NLS-2$ put("mailto:?to=&subject=mailto%20with%20examples&body=http://en.wikipedia.org/wiki/Mailto", //$NON-NLS-1$ new String[] { "583edc44ec06957a34fa645a" }); //$NON-NLS-1$ put("mailto:someone@example.com?subject=This%20is%20the%20subject", new String[] { "583edc44ec06957a34fa645a" }); //$NON-NLS-1$ //$NON-NLS-2$ put("mailto:p.dupond@example.com?subject=Sujet%20du%20courrier&cc=pierre@example.org&cc=jacques@example.net&body=Bonjour", //$NON-NLS-1$ new String[] { "583edc44ec06957a34fa645a" }); //$NON-NLS-1$ put("data:text/html;charset=US-ASCII,%3Ch1%3EHello!%3C%2Fh1%3E", new String[] { "583edc44ec06957a34fa6464" }); //$NON-NLS-1$ //$NON-NLS-2$ put("data:TEXT/html;charset=US-ASCII,%3Ch1%3EHello!%3C%2Fh1%3E", new String[] { "583edc44ec06957a34fa6464" }); put("data:text/html;charset=,%3Ch1%3EHello!%3C%2Fh1%3E", new String[] { "583edc44ec06957a34fa6464" }); //$NON-NLS-1$ //$NON-NLS-2$ put("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQAQMAAAAlPW0iAAAABlBMVEUAAAD///+l2Z/dAAAA", //$NON-NLS-1$ new String[] { "583edc44ec06957a34fa6464" }); //$NON-NLS-1$ put("data:,Hello World!", new String[] { "583edc44ec06957a34fa6464" }); //$NON-NLS-1$ //$NON-NLS-2$ put("FR7630001007941234567890185", new String[] { "583edc44ec06957a34fa6460" }); } }; @Before public void prepare() { for (STATE state : STATE.values()) { TEST_DATA.put(state.toString(), new String[] { "583edc44ec06957a34fa6470" }); //$NON-NLS-1$ } } @Test public void testClassify() throws IOException { UserDefinedClassifier userDefinedClassifier = new UDCategorySerDeser().readJsonFile(); for (String str : TEST_DATA.keySet()) { Set<String> cats = userDefinedClassifier.classify(str); String[] expect_values = TEST_DATA.get(str); assertEquals("unexpected size for " + str, expect_values.length, cats.size()); //$NON-NLS-1$ Object[] catsArray = new String[cats.size()]; int i = 0; for (String cat : cats) { catsArray[i++] = cat; } Arrays.sort(catsArray); Arrays.sort(expect_values); assertArrayEquals("wrong category found for input string: " + str, expect_values, catsArray); //$NON-NLS-1$ } } @Test public void testUniqueNames() throws IOException { UserDefinedClassifier userDefinedClassifier = new UDCategorySerDeser().readJsonFile(); Set<ISubCategory> classifiers = userDefinedClassifier.getClassifiers(); Set<String> names = new HashSet<>(); for (ISubCategory iSubCategory : classifiers) { String name = iSubCategory.getLabel(); assertTrue("Category Name: " + name + " is duplicated!", names.add(name)); //$NON-NLS-1$//$NON-NLS-2$ } } @Test public void testUniqueIds() throws IOException { UserDefinedClassifier userDefinedClassifier = new UDCategorySerDeser().readJsonFile(); Set<ISubCategory> classifiers = userDefinedClassifier.getClassifiers(); Set<String> ids = new HashSet<>(); for (ISubCategory iSubCategory : classifiers) { String id = iSubCategory.getName(); assertTrue("Category Id: " + id + " is duplicated!", ids.add(id)); //$NON-NLS-1$//$NON-NLS-2$ } } @Test public void testAddAndRemoveSubCategory() throws IOException { UserDefinedClassifier userDefinedClassifier = new UserDefinedClassifier(); addAndRemoveCategories(userDefinedClassifier); userDefinedClassifier = UDCategorySerDeser.readJsonFile(); addAndRemoveCategories(userDefinedClassifier); } @Test public void testInsertOrUpdate() { UserDefinedClassifier userDefinedClassifier = new UserDefinedClassifier(); String id = "this is the Id"; //$NON-NLS-1$ UserDefinedCategory cat = new UserDefinedCategory(id); assertTrue(userDefinedClassifier.insertOrUpdateSubCategory(cat)); assertEquals("by default, the name should be same as the id!", id, cat.getLabel()); //$NON-NLS-1$ cat.setLabel("my name"); //$NON-NLS-1$ assertTrue(userDefinedClassifier.insertOrUpdateSubCategory(cat)); Iterator<ISubCategory> it = userDefinedClassifier.getClassifiers().iterator(); while (it.hasNext()) { ISubCategory c = it.next(); assertEquals(cat.getLabel(), c.getLabel()); } } @Test public void checkLongestTLD() { String outputFilename = "TLD.txt"; downloadFile(TLD_NAME_URL, outputFilename); File file = new File(outputFilename); try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)))) { String line = null; while ((line = reader.readLine()) != null) { int length = line.trim().length(); if (!line.startsWith("#") && length > 0) { // the comments don't count assertTrue("Expected MAX_LENGTH of web domain is " + String.valueOf(MAX_TLD_LENGTH), length <= MAX_TLD_LENGTH); } } } catch (IOException e) { e.printStackTrace(); } file.delete(); } /** * download a specified file * * @param urlFilename the url to access the file * @param outputFilename the complete path+filename for output */ private void downloadFile(String urlFilename, String outputFilename) { FileOutputStream outputStream = null; try { URL url = new URL(urlFilename); ReadableByteChannel bytes = Channels.newChannel(url.openStream()); outputStream = new FileOutputStream(outputFilename); outputStream.getChannel().transferFrom(bytes, 0, Long.MAX_VALUE); } catch (IOException e) { e.printStackTrace(); } finally { if (outputStream != null) { try { outputStream.close(); } catch (IOException e) { e.printStackTrace(); } } } } private void addAndRemoveCategories(UserDefinedClassifier userDefinedClassifier) { int sizeBefore = userDefinedClassifier.getClassifiers().size(); String id = "this is the Id"; //$NON-NLS-1$ UserDefinedCategory cat = new UserDefinedCategory(id); userDefinedClassifier.removeSubCategory(cat); int sizeAfter = userDefinedClassifier.getClassifiers().size(); assertEquals("Expect to have the same size because the removed category does not exist in the list of categories. Size=" //$NON-NLS-1$ + userDefinedClassifier.getClassifiers().size(), sizeBefore, sizeAfter); userDefinedClassifier.addSubCategory(cat); sizeAfter = userDefinedClassifier.getClassifiers().size(); assertEquals( "Expect to have a different size because we add a category that does not exist in the list of categories. Size=" //$NON-NLS-1$ + userDefinedClassifier.getClassifiers().size(), sizeBefore + 1, sizeAfter); userDefinedClassifier.addSubCategory(cat); sizeAfter = userDefinedClassifier.getClassifiers().size(); assertEquals( "Expect to have only one more element than the original size because the category now exists in the list of categories. Size=" //$NON-NLS-1$ + userDefinedClassifier.getClassifiers().size(), sizeBefore + 1, sizeAfter); userDefinedClassifier.removeSubCategory(cat); sizeAfter = userDefinedClassifier.getClassifiers().size(); assertEquals("Expect to have the same size because we removed the added category. Size=" //$NON-NLS-1$ + userDefinedClassifier.getClassifiers().size(), sizeBefore, sizeAfter); userDefinedClassifier.removeSubCategory(cat); sizeAfter = userDefinedClassifier.getClassifiers().size(); assertEquals("Expect to have the same size because we removed twice the same category. Size=" //$NON-NLS-1$ + userDefinedClassifier.getClassifiers().size(), sizeBefore, sizeAfter); // now add again twice assertTrue(userDefinedClassifier.addSubCategory(cat)); sizeAfter = userDefinedClassifier.getClassifiers().size(); assertEquals( "Expect to have a different size because we add a category that does not exist in the list of categories. Size=" //$NON-NLS-1$ + userDefinedClassifier.getClassifiers().size(), sizeBefore + 1, sizeAfter); assertFalse(userDefinedClassifier.addSubCategory(cat)); sizeAfter = userDefinedClassifier.getClassifiers().size(); assertEquals( "Expect to have a still have the same size because we add a category that already exists in the list of categories. Size=" //$NON-NLS-1$ + userDefinedClassifier.getClassifiers().size(), sizeBefore + 1, sizeAfter); UserDefinedCategory cat2 = new UserDefinedCategory(id); cat2.setLabel("my name"); //$NON-NLS-1$ assertFalse(userDefinedClassifier.addSubCategory(cat2)); sizeAfter = userDefinedClassifier.getClassifiers().size(); assertEquals( "Expect to have a still have the same size because we add a category that already exists in the list of categories. Size=" //$NON-NLS-1$ + userDefinedClassifier.getClassifiers().size(), sizeBefore + 1, sizeAfter); } }