/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.metamodel.datahub; import java.io.Closeable; import java.util.ArrayList; import java.util.List; import org.apache.metamodel.AbstractUpdateCallback; import org.apache.metamodel.UpdateCallback; import org.apache.metamodel.create.TableCreationBuilder; import org.apache.metamodel.delete.RowDeletionBuilder; import org.apache.metamodel.drop.TableDropBuilder; import org.apache.metamodel.insert.RowInsertionBuilder; import org.apache.metamodel.schema.Schema; import org.apache.metamodel.schema.Table; import org.apache.metamodel.update.RowUpdationBuilder; import org.datacleaner.metamodel.datahub.update.SourceRecordIdentifier; import org.datacleaner.metamodel.datahub.update.UpdateData; public class DataHubUpdateCallback extends AbstractUpdateCallback implements UpdateCallback, Closeable { public static final int INSERT_BATCH_SIZE = 100; public static final int DELETE_BATCH_SIZE = 100; private final DataHubDataContext _dataContext; private List<UpdateData> _pendingUpdates; private List<SourceRecordIdentifier> _pendingSourceDeletes; private List<String> _pendingGoldenRecordDeletes; /** * Constructor. Initializes pending updates and deletes to be empty. * * @param dataContext * The data context. */ public DataHubUpdateCallback(final DataHubDataContext dataContext) { super(dataContext); _dataContext = dataContext; _pendingUpdates = null; _pendingSourceDeletes = null; _pendingGoldenRecordDeletes = null; } @Override public TableCreationBuilder createTable(final Schema arg0, final String arg1) throws IllegalArgumentException, IllegalStateException { throw new UnsupportedOperationException(); } @Override public TableDropBuilder dropTable(final Table arg0) throws IllegalArgumentException, IllegalStateException, UnsupportedOperationException { throw new UnsupportedOperationException(); } @Override public boolean isDropTableSupported() { return false; } @Override public RowInsertionBuilder insertInto(final Table arg0) throws IllegalArgumentException, IllegalStateException, UnsupportedOperationException { throw new UnsupportedOperationException(); } @Override public RowDeletionBuilder deleteFrom(final Table table) throws IllegalArgumentException, IllegalStateException, UnsupportedOperationException { return new DataHubDeleteBuilder(this, table); } @Override public boolean isDeleteSupported() { return false; } @Override public boolean isUpdateSupported() { return true; } @Override public RowUpdationBuilder update(final Table table) throws IllegalArgumentException, IllegalStateException, UnsupportedOperationException { return new DataHubUpdateBuilder(this, table); } /** * Invokes update REST method on DataHub, using the updates collected by the {@link DataHubUpdateBuilder}. * The incoming updates are buffered and send to DataHub in batches of size <code>INSERT_BATCH_SIZE</code>. * @param updateData Contains the records and fields to be updated. */ public void executeUpdate(final UpdateData updateData) { if (_pendingUpdates == null) { _pendingUpdates = new ArrayList<>(); } _pendingUpdates.add(updateData); if (_pendingUpdates.size() >= INSERT_BATCH_SIZE) { flushUpdates(); } } /** * Deletes a golden record by its golden record id. The deletes are buffered * and executed in batches. * * @param grId * The golden record id to delete. */ public void executeDeleteGoldenRecord(final String grId) { if (_pendingGoldenRecordDeletes == null) { _pendingGoldenRecordDeletes = new ArrayList<>(); } _pendingGoldenRecordDeletes.add(grId); if (_pendingGoldenRecordDeletes.size() >= DELETE_BATCH_SIZE) { flushGoldenRecordDeletes(); } } /** * Delete a DataHub source record. The deletes are buffered and sent to * DataHub in batches. * * @param source * The name of the source system * @param id * The source record identifier. * @param recordType * The record type. */ public void executeDeleteSourceRecord(final String source, final String id, final String recordType) { if (_pendingSourceDeletes == null) { _pendingSourceDeletes = new ArrayList<>(); } _pendingSourceDeletes.add(new SourceRecordIdentifier(source, id, null, recordType)); if (_pendingSourceDeletes.size() >= DELETE_BATCH_SIZE) { flushSourceDeletes(); } } /** * Closes the callback. All remaining updates and deletes are flushed. */ @Override public void close() { flushUpdates(); flushSourceDeletes(); flushGoldenRecordDeletes(); } private void flushUpdates() { if (_pendingUpdates == null || _pendingUpdates.isEmpty()) { return; } _dataContext.executeUpdates(_pendingUpdates); _pendingUpdates = null; } private void flushSourceDeletes() { if (_pendingSourceDeletes == null || _pendingSourceDeletes.isEmpty()) { return; } _dataContext.executeSourceDelete(_pendingSourceDeletes); _pendingSourceDeletes = null; } private void flushGoldenRecordDeletes() { if (_pendingGoldenRecordDeletes == null || _pendingGoldenRecordDeletes.isEmpty()) { return; } _dataContext.executeGoldenRecordDelete(_pendingGoldenRecordDeletes); _pendingGoldenRecordDeletes = null; } }