/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.components.maxrows;
import java.util.concurrent.atomic.AtomicInteger;
import javax.inject.Named;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.schema.Column;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Distributed;
import org.datacleaner.api.HasLabelAdvice;
import org.datacleaner.api.HiddenProperty;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.NumberProperty;
import org.datacleaner.api.QueryOptimizedFilter;
import org.datacleaner.api.Validate;
import org.datacleaner.components.categories.FilterCategory;
@Named("Max rows")
@Description("Sets a maximum number of rows to process.")
@Categorized(value = FilterCategory.class)
@Distributed(false)
public class MaxRowsFilter implements QueryOptimizedFilter<MaxRowsFilter.Category>, HasLabelAdvice {
public enum Category {
VALID, INVALID
}
public static final String PROPERTY_APPLY_ORDERING = "Apply ordering";
private final AtomicInteger counter = new AtomicInteger();
@Configured
@NumberProperty(negative = false, zero = false)
@Description("The maximum number of rows to process.")
int maxRows = 1000;
@Configured
@NumberProperty(negative = false, zero = false)
@Description("The first row (aka 'offset') to process.")
int firstRow = 1;
// this property is hidden because normally it is driven by the selection of
// "orderColumn" below
@Configured(value = PROPERTY_APPLY_ORDERING, order = 1000, required = false)
@HiddenProperty
boolean applyOrdering = true;
@Configured(order = 1001, required = false)
@Description("Optional column to use for specifying dataset ordering. Use if consistent pagination is needed.")
InputColumn<?> orderColumn;
public MaxRowsFilter() {
}
public MaxRowsFilter(final int firstRow, final int maxRows) {
this();
this.firstRow = firstRow;
this.maxRows = maxRows;
}
@Override
public String getSuggestedLabel() {
return "Max " + getMaxRows() + " rows";
}
public int getMaxRows() {
return maxRows;
}
public void setMaxRows(final int maxRows) {
this.maxRows = maxRows;
}
public int getFirstRow() {
return firstRow;
}
public void setFirstRow(final int firstRow) {
this.firstRow = firstRow;
}
public InputColumn<?> getOrderColumn() {
return orderColumn;
}
public void setOrderColumn(final InputColumn<?> orderColumn) {
this.orderColumn = orderColumn;
}
public boolean isApplyOrdering() {
return applyOrdering;
}
public void setApplyOrdering(final boolean applyOrdering) {
this.applyOrdering = applyOrdering;
}
@Validate
public void validate() {
if (maxRows <= 0) {
throw new IllegalStateException("Max rows value must be a positive integer");
}
if (firstRow <= 0) {
throw new IllegalStateException("First row value must be a positive integer");
}
}
@Override
public Category categorize(final InputRow inputRow) {
final int count = counter.incrementAndGet();
if (count < firstRow || count >= maxRows + firstRow) {
return Category.INVALID;
}
return Category.VALID;
}
@Override
public boolean isOptimizable(final Category category) {
// can only optimize the valid records
return category == Category.VALID;
}
@Override
public Query optimizeQuery(final Query q, final Category category) {
if (category == Category.VALID) {
final Integer previousMaxRows = q.getMaxRows();
final Integer previousFirstRow = q.getFirstRow();
if (firstRow > 1) {
if (previousFirstRow == null) {
q.setFirstRow(firstRow);
} else {
final int newFirstRow = previousFirstRow.intValue() + firstRow;
q.setFirstRow(newFirstRow);
}
}
if (previousMaxRows == null) {
q.setMaxRows(maxRows);
} else {
int newMaxRows = Math.min(previousMaxRows.intValue(), maxRows);
if (previousFirstRow != null) {
final Integer newFirstRow = q.getFirstRow();
final int maxWindowSizeFrombefore =
previousFirstRow.intValue() + previousMaxRows.intValue() - newFirstRow;
newMaxRows = Math.min(newMaxRows, maxWindowSizeFrombefore);
}
// avoid negative max rows
newMaxRows = Math.max(0, newMaxRows);
q.setMaxRows(newMaxRows);
}
if (applyOrdering && orderColumn != null) {
final Column physicalColumn = orderColumn.getPhysicalColumn();
q.orderBy(physicalColumn);
}
} else {
throw new IllegalStateException("Can only optimize the VALID max rows category");
}
return q;
}
}