/** * AnalyzerBeans * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.eobjects.analyzer.beans.transform; import java.util.StringTokenizer; import org.eobjects.analyzer.beans.api.Alias; import org.eobjects.analyzer.beans.api.Configured; import org.eobjects.analyzer.beans.api.Description; import org.eobjects.analyzer.beans.api.OutputColumns; import org.eobjects.analyzer.beans.api.Transformer; import org.eobjects.analyzer.beans.api.TransformerBean; import org.eobjects.analyzer.data.InputColumn; import org.eobjects.analyzer.data.InputRow; import org.eobjects.analyzer.reference.SynonymCatalog; import org.eobjects.analyzer.util.HasLabelAdvice; /** * A simple transformer that uses a synonym catalog to replace a synonym with * it's master term. * * */ @TransformerBean("Synonym lookup") @Alias("Synonym replacement") @Description("Replaces strings with their synonyms") public class SynonymLookupTransformer implements Transformer<String>, HasLabelAdvice { @Configured InputColumn<String> column; @Configured SynonymCatalog synonymCatalog; @Configured @Description("Retain original value in case no synonym is found (otherwise null)") boolean retainOriginalValue = true; @Configured @Description("Tokenize and look up every token of the input, rather than looking up the complete input string?") boolean lookUpEveryToken = false; public SynonymLookupTransformer() { } public SynonymLookupTransformer(InputColumn<String> column, SynonymCatalog synonymCatalog, boolean retainOriginalValue) { this(); this.column = column; this.synonymCatalog = synonymCatalog; this.retainOriginalValue = retainOriginalValue; } @Override public OutputColumns getOutputColumns() { return new OutputColumns(new String[] { column.getName() + " (synonyms replaced)" }); } @Override public String getSuggestedLabel() { if (synonymCatalog == null) { return null; } return "Lookup: " + synonymCatalog.getName(); } @Override public String[] transform(InputRow inputRow) { final String originalValue = inputRow.getValue(column); if (originalValue == null) { return new String[1]; } if (lookUpEveryToken) { final String delim = " \t\n\r\f.,!?\"'+-_:;/\\\\()%@"; final StringBuilder sb = new StringBuilder(); final StringTokenizer tokenizer = new StringTokenizer(originalValue, delim, true); final int numTokens = tokenizer.countTokens(); for (int i = 0; i < numTokens; i++) { final String token = tokenizer.nextToken(); if (token.matches(delim)) { // add the delim as-is sb.append(token); } else { // look up the token String replacedToken = lookup(token); if (replacedToken == null) { sb.append(token); } else { sb.append(replacedToken); } } } return new String[] { sb.toString() }; } else { final String replacedValue = lookup(originalValue); return new String[] { replacedValue }; } } private String lookup(String originalValue) { final String replacedValue = synonymCatalog.getMasterTerm(originalValue); if (retainOriginalValue && replacedValue == null) { return originalValue; } return replacedValue; } }