/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.storage;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;
import org.eobjects.analyzer.util.CollectionUtils2;
import com.google.common.cache.Cache;
/**
* An abstract RowAnnotationFactory that supports a (optional) threshold
*/
public abstract class AbstractRowAnnotationFactory implements RowAnnotationFactory, Serializable {
private static final long serialVersionUID = 1L;
private final Map<RowAnnotationImpl, AtomicInteger> _rowCounts = new ConcurrentHashMap<RowAnnotationImpl, AtomicInteger>();
private final Integer _storedRowsThreshold;
private final transient Cache<Integer, Boolean> _cachedRows = CollectionUtils2.createCache(10000, 10 * 60);
public AbstractRowAnnotationFactory(Integer storedRowsThreshold) {
if (storedRowsThreshold == null) {
_storedRowsThreshold = Integer.MAX_VALUE;
} else {
_storedRowsThreshold = storedRowsThreshold;
}
}
@Override
public void annotate(InputRow[] rows, RowAnnotation annotation) {
for (InputRow row : rows) {
annotate(row, 1, annotation);
}
}
@Override
public final void annotate(InputRow row, int distinctCount, RowAnnotation annotation) {
final RowAnnotationImpl ann = (RowAnnotationImpl) annotation;
final AtomicInteger count = getCounter(ann);
boolean storeRow = true;
if (_storedRowsThreshold != null) {
if (count.getAndIncrement() >= _storedRowsThreshold.intValue()) {
storeRow = false;
}
}
if (storeRow) {
// TODO: In clustered scenarios, there's a chance of row ID
// collision
final int rowId = row.getId();
if (_cachedRows != null) {
Boolean previously = _cachedRows.asMap().putIfAbsent(rowId, true);
if (previously == null) {
// only store row values when they where not present
// previously
storeRowValues(rowId, row, distinctCount);
}
}
storeRowAnnotation(rowId, annotation);
}
ann.incrementRowCount(distinctCount);
}
private AtomicInteger getCounter(RowAnnotationImpl ann) {
AtomicInteger count = _rowCounts.get(ann);
if (count == null) {
if (_rowCounts instanceof ConcurrentMap) {
AtomicInteger newCounter = new AtomicInteger();
ConcurrentMap<RowAnnotationImpl, AtomicInteger> concurrentMap = (ConcurrentMap<RowAnnotationImpl, AtomicInteger>) _rowCounts;
count = concurrentMap.putIfAbsent(ann, newCounter);
if (count == null) {
count = newCounter;
}
} else {
// for backwards compatibility we also need to support
// (deserialized) hash maps
synchronized (_rowCounts) {
count = _rowCounts.get(ann);
if (count == null) {
count = new AtomicInteger();
_rowCounts.put(ann, count);
}
}
}
}
return count;
}
@Override
public final void reset(RowAnnotation annotation) {
RowAnnotationImpl ann = (RowAnnotationImpl) annotation;
ann.resetRowCount();
_rowCounts.remove(annotation);
resetRows(annotation);
}
@Override
public final RowAnnotation createAnnotation() {
RowAnnotationImpl ann = new RowAnnotationImpl();
return ann;
}
@Override
public final Map<Object, Integer> getValueCounts(RowAnnotation annotation, InputColumn<?> inputColumn) {
HashMap<Object, Integer> map = new HashMap<Object, Integer>();
InputRow[] rows = getRows(annotation);
if (rows == null || rows.length == 0) {
return map;
}
for (InputRow row : rows) {
Object value = row.getValue(inputColumn);
Integer count = map.get(value);
if (count == null) {
count = 0;
}
count = count.intValue() + getDistinctCount(row);
map.put(value, count);
}
return map;
}
/**
* Removes the annotation from any rows that has been annotated with it.
*
* @param annotation
*/
protected abstract void resetRows(RowAnnotation annotation);
/**
* Gets the distinct count from a row that has been stored and retried using
* the getRows(...) method.
*
* @param row
* @return
*/
protected abstract int getDistinctCount(InputRow row);
protected abstract void storeRowAnnotation(int rowId, RowAnnotation annotation);
protected abstract void storeRowValues(int rowId, InputRow row, int distinctCount);
public final Integer getStoredRowsThreshold() {
return _storedRowsThreshold;
}
}