/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.beans.transform;
import javax.inject.Named;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.TextCategory;
import org.datacleaner.util.CharIterator;
@Named("Remove unwanted characters")
@Description( "Removes characters from strings that are not wanted. Use it to cleanse codes and identifiers "
+ "that may have additional dashes, punctuations, unwanted letters etc.")
@Categorized(TextCategory.class)
public class RemoveUnwantedCharsTransformer implements Transformer {
@Configured
InputColumn<String> column;
@Configured(order = 1)
boolean removeWhitespaces = true;
@Configured(order = 2)
boolean removeLetters = true;
@Configured(order = 3)
boolean removeDigits = false;
@Configured(order = 4)
@Description("Remove additional signs, such as dashes, punctuations, slashes and more?")
boolean removeSigns = true;
public RemoveUnwantedCharsTransformer() {
}
public RemoveUnwantedCharsTransformer(final InputColumn<String> inputColumn) {
column = inputColumn;
}
@Override
public OutputColumns getOutputColumns() {
return new OutputColumns(String.class, column.getName() + " (cleansedß)");
}
@Override
public Object[] transform(final InputRow row) {
final String value = row.getValue(column);
return transform(value);
}
public Object[] transform(final String value) {
if (value == null) {
return new Object[1];
}
final CharIterator it = new CharIterator(value);
while (it.hasNext()) {
it.next();
if (it.isWhitespace()) {
if (removeWhitespaces) {
it.remove();
}
} else if (it.isLetter()) {
if (removeLetters) {
it.remove();
}
} else if (it.isDigit()) {
if (removeDigits) {
it.remove();
}
} else if (removeSigns) {
it.remove();
}
}
return new Object[] { it.toString() };
}
}