/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.reference;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import org.apache.metamodel.DataContext;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.schema.Column;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreConnection;
import org.datacleaner.job.NoSuchColumnException;
import org.datacleaner.job.NoSuchDatastoreException;
import org.datacleaner.util.ReadObjectBuilder;
/**
* A dictionary backed by a column in a datastore.
*
* Note that even though this datastore <i>is</i> serializable it is not
* entirely able to gracefully deserialize. The user of the dictionary will have
* to inject the DatastoreCatalog using the setter method for this.
*
*
*/
public final class DatastoreDictionary extends AbstractReferenceData implements Dictionary {
private static final long serialVersionUID = 1L;
private final String _datastoreName;
private final String _qualifiedColumnName;
private final boolean _loadIntoMemory;
public DatastoreDictionary(final String name, final String datastoreName, final String qualifiedColumnName) {
this(name, datastoreName, qualifiedColumnName, true);
}
public DatastoreDictionary(final String name, final String datastoreName, final String qualifiedColumnName,
final boolean loadIntoMemory) {
super(name);
_datastoreName = datastoreName;
_qualifiedColumnName = qualifiedColumnName;
_loadIntoMemory = loadIntoMemory;
}
private void readObject(final ObjectInputStream stream) throws IOException, ClassNotFoundException {
ReadObjectBuilder.create(this, DatastoreDictionary.class).readObject(stream);
}
@Override
public boolean equals(final Object obj) {
if (super.equals(obj)) {
final DatastoreDictionary other = (DatastoreDictionary) obj;
return Objects.equals(_datastoreName, other._datastoreName) && Objects
.equals(_qualifiedColumnName, other._qualifiedColumnName) && Objects
.equals(_loadIntoMemory, other._loadIntoMemory);
}
return false;
}
public SimpleDictionary loadIntoMemory(final DatastoreConnection datastoreConnection) {
final DataContext dataContext = datastoreConnection.getDataContext();
final Column column = getColumn(datastoreConnection);
final Query query = dataContext.query().from(column.getTable()).select(column).toQuery();
if (datastoreConnection.getDatastore().getPerformanceCharacteristics().isQueryOptimizationPreferred()) {
query.getSelectClause().setDistinct(true);
}
final Set<String> values = new HashSet<>();
try (DataSet dataSet = dataContext.executeQuery(query)) {
while (dataSet.next()) {
final Object value = dataSet.getRow().getValue(0);
if (value != null) {
values.add(value.toString());
}
}
}
return new SimpleDictionary(getName(), values);
}
@Override
public DictionaryConnection openConnection(final DataCleanerConfiguration configuration) {
final Datastore datastore = configuration.getDatastoreCatalog().getDatastore(_datastoreName);
if (datastore == null) {
throw new NoSuchDatastoreException(_datastoreName);
}
final DatastoreConnection datastoreConnection = datastore.openConnection();
if (_loadIntoMemory) {
final SimpleDictionary simpleDictionary = loadIntoMemory(datastoreConnection);
// no need for the connection anymore
datastoreConnection.close();
return simpleDictionary.openConnection(configuration);
}
return new DatastoreDictionaryConnection(this, datastoreConnection);
}
public Column getColumn(final DatastoreConnection datastoreConnection) {
try {
final Column column = datastoreConnection.getDataContext().getColumnByQualifiedLabel(_qualifiedColumnName);
if (column == null) {
throw new NoSuchColumnException(_qualifiedColumnName);
}
return column;
} catch (final RuntimeException e) {
datastoreConnection.close();
throw e;
}
}
public String getDatastoreName() {
return _datastoreName;
}
public String getQualifiedColumnName() {
return _qualifiedColumnName;
}
public boolean isLoadIntoMemory() {
return _loadIntoMemory;
}
}