/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.reference; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; import org.apache.metamodel.DataContext; import org.apache.metamodel.data.DataSet; import org.apache.metamodel.data.Row; import org.apache.metamodel.schema.Column; import org.datacleaner.components.convert.ConvertToStringTransformer; import org.datacleaner.configuration.DataCleanerConfiguration; import org.datacleaner.connection.Datastore; import org.datacleaner.connection.DatastoreConnection; import org.datacleaner.job.NoSuchColumnException; import org.datacleaner.job.NoSuchDatastoreException; public final class DatastoreSynonymCatalog extends AbstractReferenceData implements SynonymCatalog { private static final long serialVersionUID = 1L; private final String _datastoreName; private final String _masterTermColumnPath; private final String[] _synonymColumnPaths; private final boolean _loadIntoMemory; public DatastoreSynonymCatalog(final String name, final String datastoreName, final String masterTermColumnPath, final String[] synonymColumnPaths) { this(name, datastoreName, masterTermColumnPath, synonymColumnPaths, true); } public DatastoreSynonymCatalog(final String name, final String datastoreName, final String masterTermColumnPath, final String[] synonymColumnPaths, final boolean loadIntoMemory) { super(name); _datastoreName = datastoreName; _masterTermColumnPath = masterTermColumnPath; _synonymColumnPaths = synonymColumnPaths; _loadIntoMemory = loadIntoMemory; } protected static String getMasterTerm(final Row row, final Column column) { final Object value = row.getValue(column); return ConvertToStringTransformer.transformValue(value); } protected static String[] getSynonyms(final Row row, final Column[] columns) { final List<String> synonyms = new ArrayList<>(); for (final Column synonymColumn : columns) { final Object value = row.getValue(synonymColumn); if (value != null) { final String stringValue = value.toString(); synonyms.add(stringValue); } } return synonyms.toArray(new String[synonyms.size()]); } @Override public boolean equals(final Object obj) { if (super.equals(obj)) { final DatastoreSynonymCatalog other = (DatastoreSynonymCatalog) obj; return Objects.equals(_datastoreName, other._datastoreName) && Objects .equals(_masterTermColumnPath, other._masterTermColumnPath) && Arrays .equals(_synonymColumnPaths, other._synonymColumnPaths) && Objects .equals(_loadIntoMemory, other._loadIntoMemory); } return false; } public String getDatastoreName() { return _datastoreName; } public String getMasterTermColumnPath() { return _masterTermColumnPath; } public String[] getSynonymColumnPaths() { return Arrays.copyOf(_synonymColumnPaths, _synonymColumnPaths.length); } @Override public SynonymCatalogConnection openConnection(final DataCleanerConfiguration configuration) { final Datastore datastore = configuration.getDatastoreCatalog().getDatastore(_datastoreName); if (datastore == null) { throw new NoSuchDatastoreException(_datastoreName); } final DatastoreConnection datastoreConnection = datastore.openConnection(); if (_loadIntoMemory) { final SimpleSynonymCatalog simpleSynonymCatalog = loadIntoMemory(datastoreConnection); // no need for the connection anymore datastoreConnection.close(); return simpleSynonymCatalog.openConnection(configuration); } return new DatastoreSynonymCatalogConnection(this, datastoreConnection); } public Column[] getSynonymColumns(final DatastoreConnection datastoreConnection) { final Column[] columns = new Column[_synonymColumnPaths.length]; for (int i = 0; i < columns.length; i++) { final String columnPath = _synonymColumnPaths[i]; columns[i] = datastoreConnection.getDataContext().getColumnByQualifiedLabel(columnPath); if (columns[i] == null) { throw new NoSuchColumnException(columnPath); } } return columns; } public Column getMasterTermColumn(final DatastoreConnection datastoreConnection) { final DataContext dataContext = datastoreConnection.getDataContext(); final Column masterTermColumn = dataContext.getColumnByQualifiedLabel(_masterTermColumnPath); if (masterTermColumn == null) { throw new NoSuchColumnException(_masterTermColumnPath); } return masterTermColumn; } public SimpleSynonymCatalog loadIntoMemory(final DatastoreConnection datastoreConnection) { final Map<String, String> synonymMap = new HashMap<>(); final Column masterTermColumn = getMasterTermColumn(datastoreConnection); final Column[] columns = getSynonymColumns(datastoreConnection); try (DataSet dataSet = datastoreConnection.getDataContext().query().from(masterTermColumn.getTable().getName()) .select(masterTermColumn).select(columns).execute()) { while (dataSet.next()) { final Row row = dataSet.getRow(); final String masterTerm = getMasterTerm(row, masterTermColumn); final String[] synonyms = getSynonyms(row, columns); for (final String synonym : synonyms) { synonymMap.put(synonym, masterTerm); } } } return new SimpleSynonymCatalog(getName(), synonymMap); } public boolean isLoadIntoMemory() { return _loadIntoMemory; } }