/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.storage; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.datacleaner.api.InputRow; import org.datacleaner.configuration.DataCleanerEnvironment; import org.datacleaner.util.ImmutableEntry; /** * Default {@link RowAnnotationFactory} instance. Stores up to 1000 rows in an * in memory annotation. * * @deprecated get your {@link RowAnnotationFactory} from the * {@link DataCleanerEnvironment#getStorageProvider()} method or the * {@link RowAnnotations} class instead. */ @Deprecated public class InMemoryRowAnnotationFactory extends AbstractRowAnnotationFactory { private static final long serialVersionUID = 1L; // contains annotations, mapped to row-ids private final Map<RowAnnotation, Set<Integer>> _annotatedRows = new ConcurrentHashMap<>(); // contains row id's mapped to rows mapped to distinct counts private final Map<Integer, Map.Entry<InputRow, Integer>> _distinctCounts = new ConcurrentHashMap<>(); public InMemoryRowAnnotationFactory() { this(1000); } public InMemoryRowAnnotationFactory(final int storedRowsThreshold) { super(storedRowsThreshold); } protected int getInMemoryRowCount(final RowAnnotation annotation) { final Set<Integer> rows = _annotatedRows.get(annotation); if (rows == null) { return 0; } return rows.size(); } @Override protected void resetRows(final RowAnnotation annotation) { _annotatedRows.remove(annotation); } @Override protected int getDistinctCount(final InputRow row) { return _distinctCounts.get(row.getId()).getValue(); } @Override public void annotate(final InputRow row, final int distinctCount, final RowAnnotation annotation) { for (int i = 0; i < distinctCount; i++) { annotate(row, annotation); } } @Override protected void storeRowAnnotation(final int rowId, final RowAnnotation annotation) { final Set<Integer> rowIds = getRowIds(annotation); rowIds.add(rowId); } private Set<Integer> getRowIds(final RowAnnotation annotation) { Set<Integer> rowIds = _annotatedRows.get(annotation); if (rowIds == null) { rowIds = Collections.synchronizedSet(new LinkedHashSet<Integer>()); _annotatedRows.put(annotation, rowIds); } return rowIds; } @Override protected void storeRowValues(final int rowId, final InputRow row) { _distinctCounts.put(rowId, new ImmutableEntry<>(row, 1)); } @Override public boolean hasSampleRows(final RowAnnotation annotation) { if (_annotatedRows.containsKey(annotation)) { return true; } return false; } @Override public List<InputRow> getSampleRows(final RowAnnotation annotation) { final Set<Integer> rowIds = _annotatedRows.get(annotation); if (rowIds == null) { return Collections.emptyList(); } final List<InputRow> rows = new ArrayList<>(rowIds.size()); for (final Integer rowId : rowIds) { final InputRow row = _distinctCounts.get(rowId).getKey(); rows.add(row); } return rows; } @Override public void transferAnnotations(final RowAnnotation from, final RowAnnotation to) { final int rowCountToAdd = from.getRowCount(); ((RowAnnotationImpl) to).incrementRowCount(rowCountToAdd); } }