/** * AnalyzerBeans * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.eobjects.analyzer.beans.filter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Date; import java.util.Properties; import org.eobjects.analyzer.beans.api.Categorized; import org.eobjects.analyzer.beans.api.Close; import org.eobjects.analyzer.beans.api.Configured; import org.eobjects.analyzer.beans.api.Description; import org.eobjects.analyzer.beans.api.Distributed; import org.eobjects.analyzer.beans.api.FileProperty; import org.eobjects.analyzer.beans.api.FileProperty.FileAccessMode; import org.eobjects.analyzer.beans.api.FilterBean; import org.eobjects.analyzer.beans.api.Initialize; import org.eobjects.analyzer.beans.api.Optimizeable; import org.eobjects.analyzer.beans.api.QueryOptimizedFilter; import org.eobjects.analyzer.beans.categories.DateAndTimeCategory; import org.eobjects.analyzer.beans.categories.FilterCategory; import org.eobjects.analyzer.beans.convert.ConvertToDateTransformer; import org.eobjects.analyzer.beans.convert.ConvertToNumberTransformer; import org.eobjects.analyzer.data.InputColumn; import org.eobjects.analyzer.data.InputRow; import org.eobjects.analyzer.util.StringUtils; import org.apache.metamodel.query.OperatorType; import org.apache.metamodel.query.Query; import org.apache.metamodel.schema.Column; import org.apache.metamodel.schema.Table; import org.apache.metamodel.util.Action; import org.apache.metamodel.util.Resource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Filter for archieving a "change data capture" mechanism based on a * "last modified" field. After each execution, the greatest timestamp is * recorded and picked up successively by the next run. */ @FilterBean("Capture changed records") @Description("Include only records that have changed since the last time you ran the job. This filter assumes a field containing the timestamp of the latest change for each record, and stores the greatest encountered value in order to update the filter's future state.") @Distributed(false) @Categorized({ FilterCategory.class, DateAndTimeCategory.class }) @Optimizeable(removeableUponOptimization = false) public class CaptureChangedRecordsFilter implements QueryOptimizedFilter<ValidationCategory> { private static final Logger logger = LoggerFactory.getLogger(CaptureChangedRecordsFilter.class); @Configured @Description("Column containing the last modification timestamp or date.") InputColumn<Object> lastModifiedColumn; @Configured @Description("A file used to persist and load the latest state of this data capture component.") @FileProperty(extension = "properties", accessMode = FileAccessMode.SAVE) Resource captureStateFile; @Configured(required = false) @Description("A custom identifier for this captured state. If omitted, the name of the 'Last modified column' will be used.") String captureStateIdentifier; private long _lastModifiedThreshold = -1l; private long _greatestEncounteredDate = -1l; @Initialize public void initialize() throws IOException { final Properties properties = loadProperties(); final String key = getPropertyKey(); final Object lastModified = properties.get(key); if (lastModified != null) { final Number lastModifiedAsNumber = convertToNumber(lastModified); if (lastModifiedAsNumber != null) { _lastModifiedThreshold = lastModifiedAsNumber.longValue(); } } } @Override public boolean isOptimizable(final ValidationCategory category) { // only the valid category is optimizeable currently return category == ValidationCategory.VALID; } @Override public Query optimizeQuery(final Query q, final ValidationCategory category) { assert category == ValidationCategory.VALID; if (_lastModifiedThreshold != -1l) { final Column column = lastModifiedColumn.getPhysicalColumn(); if (column.getType().isTimeBased()) { q.where(column, OperatorType.GREATER_THAN, new Date(_lastModifiedThreshold)); } else { q.where(column, OperatorType.GREATER_THAN, _lastModifiedThreshold); } } return q; } @Close(onFailure = false) public void close() throws IOException { if (_greatestEncounteredDate != -1) { final Properties properties = loadProperties(); final String key = getPropertyKey(); properties.setProperty(key, "" + _greatestEncounteredDate); captureStateFile.write(new Action<OutputStream>() { @Override public void run(OutputStream out) throws Exception { properties.store(out, null); } }); } } /** * Gets the key to use in the capture state file. If there is not a * captureStateIdentifier available, we want to avoid using a hardcoded key, * since the same file may be used for multiple purposes, even multiple * filters of the same type. Of course this is not desired configuration, * but may be more convenient for lazy users! * * @return */ private String getPropertyKey() { if (StringUtils.isNullOrEmpty(captureStateIdentifier)) { if (lastModifiedColumn.isPhysicalColumn()) { Table table = lastModifiedColumn.getPhysicalColumn().getTable(); if (table != null && !StringUtils.isNullOrEmpty(table.getName())) { return table.getName() + "." + lastModifiedColumn.getName() + ".GreatestLastModifiedTimestamp"; } } return lastModifiedColumn.getName() + ".GreatestLastModifiedTimestamp"; } return captureStateIdentifier.trim() + ".GreatestLastModifiedTimestamp"; } private Properties loadProperties() throws IOException { final Properties properties = new Properties(); if (!captureStateFile.isExists()) { logger.info("Capture state file does not exist: {}", captureStateFile); return properties; } captureStateFile.read(new Action<InputStream>() { @Override public void run(InputStream in) throws Exception { properties.load(in); } }); return properties; } @Override public ValidationCategory categorize(InputRow inputRow) { final Object lastModified = inputRow.getValue(lastModifiedColumn); long rowColumnValue = -1l; if (lastModified != null) { if (lastModified instanceof String) { final Date date = ConvertToDateTransformer.getInternalInstance().transformValue(lastModified); if (date != null) { rowColumnValue = date.getTime(); } } else { final Number lastModifiedAsNumber = convertToNumber(lastModified); if (lastModifiedAsNumber != null) { rowColumnValue = lastModifiedAsNumber.longValue(); } } } if (rowColumnValue != -1l) { synchronized (this) { if (_greatestEncounteredDate == -1l || _greatestEncounteredDate < rowColumnValue) { _greatestEncounteredDate = rowColumnValue; } } } if (_lastModifiedThreshold == -1l) { return ValidationCategory.VALID; } if (rowColumnValue == -1l) { logger.info("Value of {} was not comparable, returning INVALID category: {}", lastModifiedColumn.getName(), inputRow); return ValidationCategory.INVALID; } if (_lastModifiedThreshold < rowColumnValue) { return ValidationCategory.VALID; } return ValidationCategory.INVALID; } private Number convertToNumber(final Object lastModified) { return ConvertToNumberTransformer.transformValue(lastModified); } }