/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.beans.transform;
import static org.junit.Assert.assertEquals;
import java.util.List;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.beans.transform.RemoveDictionaryMatchesTransformer.RemovedMatchesType;
import org.datacleaner.configuration.DataCleanerConfigurationImpl;
import org.datacleaner.data.MockInputColumn;
import org.datacleaner.reference.Dictionary;
import org.datacleaner.reference.SimpleDictionary;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class RemoveDictionaryMatchesTransformerTest {
private final InputColumn<String> col = new MockInputColumn<>("Job title");
private final Dictionary dictionary =
new SimpleDictionary("Title adjectives", "Junior", "Senior", "Lead", "Principal", "Assistant to",
"Assistant to the");
private RemoveDictionaryMatchesTransformer transformer;
@Before
public void setUp() {
transformer = new RemoveDictionaryMatchesTransformer(col, dictionary, new DataCleanerConfigurationImpl());
transformer.init();
}
@After
public void tearDown() {
transformer.close();
}
@Test
public void testCaseInsensitiveRemoval() throws Exception {
final Dictionary dictionary =
new SimpleDictionary("Title adjectives", false, "Junior", "Senior", "Lead", "Principal", "Assistant to",
"Assistant to the");
transformer = new RemoveDictionaryMatchesTransformer(col, dictionary, new DataCleanerConfigurationImpl());
transformer.init();
assertEquals(" GURU OF EMPLOYEES", transformer.transform("ASSISTANT TO THE LEAD GURU OF JUNIOR EMPLOYEES")[0]);
// because of the two approaches to matching (multi-word and
// single-word) the result will vary a bit here. Multi-word matches will
// represent the sentence as it is in the dictionary. Single-word
// matches will represent the match found in the string.
assertEquals("assistant to the LEAD JUNIOR",
transformer.transform("ASSISTANT TO THE LEAD GURU OF JUNIOR EMPLOYEES")[1]);
transformer.close();
}
@Test
public void testGetOutputColumns() throws Exception {
transformer._removedMatchesType = RemovedMatchesType.STRING;
final OutputColumns outputColumns = transformer.getOutputColumns();
assertEquals("OutputColumns[Job title (Title adjectives removed), Removed matches]", outputColumns.toString());
assertEquals(String.class, outputColumns.getColumnType(1));
transformer._removedMatchesType = RemovedMatchesType.LIST;
assertEquals("OutputColumns[Job title (Title adjectives removed), Removed matches]",
transformer.getOutputColumns().toString());
assertEquals(List.class, transformer.getOutputColumns().getColumnType(1));
}
@Test
public void testWordBoundarySplitting() throws Throwable {
transformer._removedMatchesType = RemovedMatchesType.STRING;
assertEquals("", transformer.transform("")[0]);
assertEquals("", transformer.transform("")[1]);
assertEquals("., Software Engineer", transformer.transform(".Senior, Software Engineer")[0]);
assertEquals("Senior", transformer.transform("Senior Software Engineer")[1]);
}
@Test
public void testJobTitleScenarioRemovedMatchesAsString() throws Throwable {
transformer._removedMatchesType = RemovedMatchesType.STRING;
assertEquals("", transformer.transform("")[0]);
assertEquals("", transformer.transform("")[1]);
assertEquals(" Software Engineer", transformer.transform("Senior Software Engineer")[0]);
assertEquals("Senior", transformer.transform("Senior Software Engineer")[1]);
assertEquals(" Designer ", transformer.transform(" Lead Designer ")[0]);
assertEquals("Lead", transformer.transform(" Lead Designer ")[1]);
assertEquals("Software Engineer", transformer.transform("Software Engineer")[0]);
assertEquals("", transformer.transform("Software Engineer")[1]);
assertEquals(" Guru of employees",
transformer.transform("Principal Senior Lead Guru of Junior employees")[0]);
assertEquals("Principal Senior Lead Junior",
transformer.transform("Principal Senior Lead Guru of Junior employees")[1]);
}
@Test
public void testJobTitleScenarioRemovedMatchesAsList() throws Throwable {
transformer._removedMatchesType = RemovedMatchesType.LIST;
assertEquals(" Software Engineer", transformer.transform("Senior Software Engineer")[0]);
assertEquals("[Senior]", transformer.transform("Senior Software Engineer")[1].toString());
assertEquals(" Designer ", transformer.transform(" Lead Designer ")[0]);
assertEquals("[Lead]", transformer.transform(" Lead Designer ")[1].toString());
assertEquals("Software Engineer", transformer.transform("Software Engineer")[0]);
assertEquals("[]", transformer.transform("Software Engineer")[1].toString());
assertEquals(" Guru of employees",
transformer.transform("Principal Senior Lead Guru of Junior employees")[0]);
assertEquals("[Principal, Senior, Lead, Junior]",
transformer.transform("Principal Senior Lead Guru of Junior employees")[1].toString());
assertEquals("", transformer.transform("")[0]);
assertEquals("[]", transformer.transform("")[1].toString());
}
}