/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.beans.standardize;
import javax.inject.Inject;
import org.eobjects.analyzer.beans.api.Categorized;
import org.eobjects.analyzer.beans.api.Configured;
import org.eobjects.analyzer.beans.api.Description;
import org.eobjects.analyzer.beans.api.OutputColumns;
import org.eobjects.analyzer.beans.api.Transformer;
import org.eobjects.analyzer.beans.api.TransformerBean;
import org.eobjects.analyzer.beans.categories.MatchingAndStandardizationCategory;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;
import org.eobjects.analyzer.util.HasGroupLiteral;
import org.eobjects.analyzer.util.NamedPattern;
import org.eobjects.analyzer.util.NamedPatternMatch;
/**
* Tokenizes/standardizes the components of an email: Username and Domain
*
*
*/
@TransformerBean("Email standardizer")
@Description("Retrieve the username or domain from an email address.")
@Categorized({ MatchingAndStandardizationCategory.class })
public class EmailStandardizerTransformer implements Transformer<String> {
public static final NamedPattern<EmailPart> EMAIL_PATTERN = new NamedPattern<EmailPart>("USERNAME@DOMAIN",
EmailPart.class);
public static enum EmailPart implements HasGroupLiteral {
USERNAME("([a-zA-Z0-9\\._%+-]+)"), DOMAIN("([a-zA-Z0-9\\._%+-]+\\.[a-zA-Z0-9\\._%+-]{2,4})");
private String groupLiteral;
private EmailPart(String groupLiteral) {
this.groupLiteral = groupLiteral;
}
@Override
public String getGroupLiteral() {
return groupLiteral;
}
}
@Inject
@Configured
InputColumn<String> inputColumn;
@Override
public OutputColumns getOutputColumns() {
return new OutputColumns("Username", "Domain");
}
@Override
public String[] transform(InputRow inputRow) {
String value = inputRow.getValue(inputColumn);
return transform(value);
}
public String[] transform(String value) {
String username = null;
String domain = null;
if (value != null) {
NamedPatternMatch<EmailPart> match = EMAIL_PATTERN.match(value);
if (match != null) {
username = match.get(EmailPart.USERNAME);
domain = match.get(EmailPart.DOMAIN);
}
}
return new String[] { username, domain };
}
public void setInputColumn(InputColumn<String> inputColumn) {
this.inputColumn = inputColumn;
}
}