AnnotatedRowsResult.java example

Explorer
DataCleaner-master
/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.result;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeSet;

import javax.swing.table.DefaultTableModel;
import javax.swing.table.TableModel;

import org.apache.metamodel.util.CollectionUtils;
import org.apache.metamodel.util.Ref;
import org.apache.metamodel.util.SerializableRef;
import org.datacleaner.api.AnalyzerResult;
import org.datacleaner.api.Description;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.data.MutableInputColumn;
import org.datacleaner.storage.RowAnnotation;
import org.datacleaner.storage.RowAnnotationFactory;
import org.datacleaner.storage.RowAnnotations;

/**
 * Represents a typical "drill to detail" result consisting of a set of
 * annotated rows.
 *
 * Furthermore, if classes inherit from {@link AnnotatedRowsResult}, they can be
 * annotated with the {@link Description} annotation to provide a labeling
 * description, used often in rendering.
 *
 *
 */
public class AnnotatedRowsResult implements AnalyzerResult, TableModelResult {

    private static final long serialVersionUID = 1L;

    private final Ref<RowAnnotationFactory> _annotationFactoryRef;
    private final InputColumn<?>[] _highlightedColumns;
    private final RowAnnotation _annotation;
    private transient List<InputRow> _rows;
    private transient TableModel _tableModel;
    private transient List<InputColumn<?>> _inputColumns;

    public AnnotatedRowsResult(final RowAnnotation annotation, final RowAnnotationFactory annotationFactory,
            final InputColumn<?>... highlightedColumns) {
        _annotationFactoryRef = new SerializableRef<>(annotationFactory);
        _annotation = annotation;
        _highlightedColumns = highlightedColumns;
    }

    /**
     * Factory method for {@link AnnotatedRowsResult} that will return non-null
     * ONLY if the {@link RowAnnotation} passed in has any sample rows according
     * to the {@link RowAnnotationFactory}.
     *
     * Otherwise returning null has the benefit that usually it makes it easy to
     * filter out unnecesary drill-to-detail result objects.
     *
     * @param annotation
     * @param annotationFactory
     * @param column
     * @return
     */
    public static AnnotatedRowsResult createIfSampleRowsAvailable(final RowAnnotation annotation,
            final RowAnnotationFactory annotationFactory, final InputColumn<?>... columns) {
        if (annotationFactory.hasSampleRows(annotation)) {
            return new AnnotatedRowsResult(annotation, annotationFactory, columns);
        }
        return null;
    }

    public List<InputColumn<?>> getInputColumns() {
        if (_inputColumns == null) {
            final List<InputRow> rows = getSampleRows();
            if (!rows.isEmpty()) {
                final InputRow firstRow = rows.iterator().next();
                final List<InputColumn<?>> inputColumns = firstRow.getInputColumns();
                _inputColumns = CollectionUtils.filter(inputColumns, col -> {
                    if (col instanceof MutableInputColumn) {
                        if (((MutableInputColumn<?>) col).isHidden()) {
                            // avoid hidden columns in the
                            return false;
                        }
                    }
                    return true;
                });
            } else {
                _inputColumns = new ArrayList<>(0);
            }
        }
        return _inputColumns;
    }

    /**
     *
     * @return
     * @deprecated use {@link #getSampleRows()} instead
     **/
    @Deprecated
    public InputRow[] getRows() {
        return getSampleRows().toArray(new InputRow[0]);
    }

    public List<InputRow> getSampleRows() {
        if (_rows == null) {
            final RowAnnotationFactory annotationFactory = _annotationFactoryRef.get();
            if (annotationFactory != null) {
                _rows = annotationFactory.getSampleRows(getAnnotation());
            }
            if (_rows == null) {
                _rows = Collections.emptyList();
            }
        }
        return _rows;
    }

    /**
     * Creates a table model containing only distinct values from a particular
     * input column, and the counts of those distinct values. Note that the
     * counts may only be the count from the data that is available in the
     * annotation row storage, which may just be a preview/subset of the actual
     * data.
     *
     * @param inputColumnOfInterest
     * @return
     */
    public TableModel toDistinctValuesTableModel(final InputColumn<?> inputColumnOfInterest) {
        final Map<Object, Integer> valueCounts;
        final RowAnnotationFactory annotationFactory = _annotationFactoryRef.get();
        if (annotationFactory == null) {
            valueCounts = Collections.emptyMap();
        } else {
            valueCounts = getValueCounts(annotationFactory, getAnnotation(), inputColumnOfInterest);
        }
        final DefaultTableModel tableModel =
                new DefaultTableModel(new String[] { inputColumnOfInterest.getName(), "Count in dataset" },
                        valueCounts.size());

        // sort the set
        final TreeSet<Entry<Object, Integer>> set = new TreeSet<>((o1, o2) -> {
            final int countDiff = o2.getValue().intValue() - o1.getValue().intValue();
            if (countDiff == 0) {
                return -1;
            }
            return countDiff;
        });
        set.addAll(valueCounts.entrySet());

        int i = 0;
        for (final Entry<Object, Integer> entry : set) {
            tableModel.setValueAt(entry.getKey(), i, 0);
            tableModel.setValueAt(entry.getValue(), i, 1);
            i++;
        }

        return tableModel;
    }

    private Map<Object, Integer> getValueCounts(final RowAnnotationFactory annotationFactory,
            final RowAnnotation annotation, final InputColumn<?> inputColumn) {
        final List<InputRow> rows = annotationFactory.getSampleRows(annotation);

        if (rows == null || rows.isEmpty()) {
            return Collections.emptyMap();
        }

        final HashMap<Object, Integer> map = new HashMap<>();
        for (final InputRow row : rows) {
            final Object value = row.getValue(inputColumn);
            Integer count = map.get(value);
            if (count == null) {
                count = 0;
            }
            count = count.intValue() + 1;
            map.put(value, count);
        }
        return map;
    }

    /**
     *
     * @param maxRows
     * @return
     */
    public TableModel toTableModel(int maxRows) {
        if (maxRows < 0) {
            maxRows = Integer.MAX_VALUE;
        }

        final List<InputRow> rows = getSampleRows();
        final List<InputColumn<?>> inputColumns = getInputColumns();
        final String[] headers = new String[inputColumns.size()];
        for (int i = 0; i < headers.length; i++) {
            headers[i] = inputColumns.get(i).getName();
        }

        final int actualRows = Math.min(maxRows, rows.size());
        final TableModel tableModel = new DefaultTableModel(headers, actualRows);
        int row = 0;
        for (final InputRow inputRow : rows) {
            if (actualRows == row) {
                break;
            }
            for (int i = 0; i < inputColumns.size(); i++) {
                final InputColumn<?> inputColumn = inputColumns.get(i);
                final Object value = inputRow.getValue(inputColumn);
                tableModel.setValueAt(value, row, i);
            }
            row++;
        }
        return tableModel;
    }

    @Override
    public TableModel toTableModel() {
        if (_tableModel == null) {
            _tableModel = toTableModel(-1);
        }
        return _tableModel;
    }

    public InputColumn<?>[] getHighlightedColumns() {
        return _highlightedColumns;
    }

    public int getColumnIndex(final InputColumn<?> col) {
        final List<InputColumn<?>> inputColumns = getInputColumns();
        int i = 0;
        for (final InputColumn<?> inputColumn : inputColumns) {
            if (col.equals(inputColumn)) {
                return i;
            }
            i++;
        }
        return -1;
    }

    public RowAnnotation getAnnotation() {
        if (_annotation == null) {
            // only occurs for deserialized instances
            return RowAnnotations.getDefaultFactory().createAnnotation();
        }
        return _annotation;
    }

    public int getAnnotatedRowCount() {
        return getAnnotation().getRowCount();
    }
}