/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.reference; import java.io.IOException; import java.io.ObjectInputStream; import java.util.List; import java.util.Objects; import org.datacleaner.beans.stringpattern.DefaultTokenizer; import org.datacleaner.beans.stringpattern.Token; import org.datacleaner.beans.stringpattern.TokenPattern; import org.datacleaner.beans.stringpattern.TokenPatternImpl; import org.datacleaner.beans.stringpattern.Tokenizer; import org.datacleaner.beans.stringpattern.TokenizerConfiguration; import org.datacleaner.configuration.DataCleanerConfiguration; import org.datacleaner.util.LabelUtils; import org.datacleaner.util.ReadObjectBuilder; /** * Represents a string pattern that is based on a sequence of token types. The * pattern format is similar to the one used by the Pattern finder analyzer, * which makes it ideal for reusing discovered patterns. * * @see TokenPattern * * */ public final class SimpleStringPattern extends AbstractReferenceData implements StringPattern { private static final long serialVersionUID = 1L; private final String _expression; private transient TokenPattern _tokenPattern; private transient DefaultTokenizer _tokenizer; private transient TokenizerConfiguration _configuration; public SimpleStringPattern(final String name, final String expression) { this(name, expression, new TokenizerConfiguration()); } public SimpleStringPattern(final String name, final String expression, final TokenizerConfiguration configuration) { super(name); _expression = expression; _configuration = configuration; } private void readObject(final ObjectInputStream stream) throws IOException, ClassNotFoundException { ReadObjectBuilder.create(this, SimpleStringPattern.class).readObject(stream); } @Override public boolean equals(final Object obj) { if (super.equals(obj)) { final SimpleStringPattern other = (SimpleStringPattern) obj; return Objects.equals(_expression, other._expression); } return false; } public boolean matches(final String string) { final List<Token> tokens = getTokenizer().tokenize(string); return getTokenPattern().match(tokens); } @Override public StringPatternConnection openConnection(final DataCleanerConfiguration configuration) { return new StringPatternConnection() { @Override public boolean matches(final String string) { return SimpleStringPattern.this.matches(string); } @Override public void close() { } }; } private Tokenizer getTokenizer() { if (_tokenizer == null) { _tokenizer = new DefaultTokenizer(getConfiguration()); } return _tokenizer; } private TokenizerConfiguration getConfiguration() { if (_configuration == null) { // TODO: Ideally we should provide all the configuration options in // the constructor _configuration = new TokenizerConfiguration(); } return _configuration; } private TokenPattern getTokenPattern() { if (_tokenPattern == null) { final String expression; if (LabelUtils.NULL_LABEL.equals(_expression)) { expression = null; } else if (LabelUtils.BLANK_LABEL.equals(_expression)) { expression = ""; } else { expression = _expression; } final List<Token> tokens = getTokenizer().tokenize(expression); _tokenPattern = new TokenPatternImpl(expression, tokens, getConfiguration()); } return _tokenPattern; } public String getExpression() { return _expression; } @Override public String toString() { return "SimpleStringPattern[name=" + getName() + ", expression=" + _expression + "]"; } }