/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.result;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeSet;
import javax.swing.table.DefaultTableModel;
import javax.swing.table.TableModel;
import org.apache.metamodel.util.CollectionUtils;
import org.apache.metamodel.util.Ref;
import org.apache.metamodel.util.SerializableRef;
import org.datacleaner.api.AnalyzerResult;
import org.datacleaner.api.Description;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.data.MutableInputColumn;
import org.datacleaner.storage.RowAnnotation;
import org.datacleaner.storage.RowAnnotationFactory;
import org.datacleaner.storage.RowAnnotations;
/**
* Represents a typical "drill to detail" result consisting of a set of
* annotated rows.
*
* Furthermore, if classes inherit from {@link AnnotatedRowsResult}, they can be
* annotated with the {@link Description} annotation to provide a labeling
* description, used often in rendering.
*
*
*/
public class AnnotatedRowsResult implements AnalyzerResult, TableModelResult {
private static final long serialVersionUID = 1L;
private final Ref<RowAnnotationFactory> _annotationFactoryRef;
private final InputColumn<?>[] _highlightedColumns;
private final RowAnnotation _annotation;
private transient List<InputRow> _rows;
private transient TableModel _tableModel;
private transient List<InputColumn<?>> _inputColumns;
public AnnotatedRowsResult(final RowAnnotation annotation, final RowAnnotationFactory annotationFactory,
final InputColumn<?>... highlightedColumns) {
_annotationFactoryRef = new SerializableRef<>(annotationFactory);
_annotation = annotation;
_highlightedColumns = highlightedColumns;
}
/**
* Factory method for {@link AnnotatedRowsResult} that will return non-null
* ONLY if the {@link RowAnnotation} passed in has any sample rows according
* to the {@link RowAnnotationFactory}.
*
* Otherwise returning null has the benefit that usually it makes it easy to
* filter out unnecesary drill-to-detail result objects.
*
* @param annotation
* @param annotationFactory
* @param column
* @return
*/
public static AnnotatedRowsResult createIfSampleRowsAvailable(final RowAnnotation annotation,
final RowAnnotationFactory annotationFactory, final InputColumn<?>... columns) {
if (annotationFactory.hasSampleRows(annotation)) {
return new AnnotatedRowsResult(annotation, annotationFactory, columns);
}
return null;
}
public List<InputColumn<?>> getInputColumns() {
if (_inputColumns == null) {
final List<InputRow> rows = getSampleRows();
if (!rows.isEmpty()) {
final InputRow firstRow = rows.iterator().next();
final List<InputColumn<?>> inputColumns = firstRow.getInputColumns();
_inputColumns = CollectionUtils.filter(inputColumns, col -> {
if (col instanceof MutableInputColumn) {
if (((MutableInputColumn<?>) col).isHidden()) {
// avoid hidden columns in the
return false;
}
}
return true;
});
} else {
_inputColumns = new ArrayList<>(0);
}
}
return _inputColumns;
}
/**
*
* @return
* @deprecated use {@link #getSampleRows()} instead
**/
@Deprecated
public InputRow[] getRows() {
return getSampleRows().toArray(new InputRow[0]);
}
public List<InputRow> getSampleRows() {
if (_rows == null) {
final RowAnnotationFactory annotationFactory = _annotationFactoryRef.get();
if (annotationFactory != null) {
_rows = annotationFactory.getSampleRows(getAnnotation());
}
if (_rows == null) {
_rows = Collections.emptyList();
}
}
return _rows;
}
/**
* Creates a table model containing only distinct values from a particular
* input column, and the counts of those distinct values. Note that the
* counts may only be the count from the data that is available in the
* annotation row storage, which may just be a preview/subset of the actual
* data.
*
* @param inputColumnOfInterest
* @return
*/
public TableModel toDistinctValuesTableModel(final InputColumn<?> inputColumnOfInterest) {
final Map<Object, Integer> valueCounts;
final RowAnnotationFactory annotationFactory = _annotationFactoryRef.get();
if (annotationFactory == null) {
valueCounts = Collections.emptyMap();
} else {
valueCounts = getValueCounts(annotationFactory, getAnnotation(), inputColumnOfInterest);
}
final DefaultTableModel tableModel =
new DefaultTableModel(new String[] { inputColumnOfInterest.getName(), "Count in dataset" },
valueCounts.size());
// sort the set
final TreeSet<Entry<Object, Integer>> set = new TreeSet<>((o1, o2) -> {
final int countDiff = o2.getValue().intValue() - o1.getValue().intValue();
if (countDiff == 0) {
return -1;
}
return countDiff;
});
set.addAll(valueCounts.entrySet());
int i = 0;
for (final Entry<Object, Integer> entry : set) {
tableModel.setValueAt(entry.getKey(), i, 0);
tableModel.setValueAt(entry.getValue(), i, 1);
i++;
}
return tableModel;
}
private Map<Object, Integer> getValueCounts(final RowAnnotationFactory annotationFactory,
final RowAnnotation annotation, final InputColumn<?> inputColumn) {
final List<InputRow> rows = annotationFactory.getSampleRows(annotation);
if (rows == null || rows.isEmpty()) {
return Collections.emptyMap();
}
final HashMap<Object, Integer> map = new HashMap<>();
for (final InputRow row : rows) {
final Object value = row.getValue(inputColumn);
Integer count = map.get(value);
if (count == null) {
count = 0;
}
count = count.intValue() + 1;
map.put(value, count);
}
return map;
}
/**
*
* @param maxRows
* @return
*/
public TableModel toTableModel(int maxRows) {
if (maxRows < 0) {
maxRows = Integer.MAX_VALUE;
}
final List<InputRow> rows = getSampleRows();
final List<InputColumn<?>> inputColumns = getInputColumns();
final String[] headers = new String[inputColumns.size()];
for (int i = 0; i < headers.length; i++) {
headers[i] = inputColumns.get(i).getName();
}
final int actualRows = Math.min(maxRows, rows.size());
final TableModel tableModel = new DefaultTableModel(headers, actualRows);
int row = 0;
for (final InputRow inputRow : rows) {
if (actualRows == row) {
break;
}
for (int i = 0; i < inputColumns.size(); i++) {
final InputColumn<?> inputColumn = inputColumns.get(i);
final Object value = inputRow.getValue(inputColumn);
tableModel.setValueAt(value, row, i);
}
row++;
}
return tableModel;
}
@Override
public TableModel toTableModel() {
if (_tableModel == null) {
_tableModel = toTableModel(-1);
}
return _tableModel;
}
public InputColumn<?>[] getHighlightedColumns() {
return _highlightedColumns;
}
public int getColumnIndex(final InputColumn<?> col) {
final List<InputColumn<?>> inputColumns = getInputColumns();
int i = 0;
for (final InputColumn<?> inputColumn : inputColumns) {
if (col.equals(inputColumn)) {
return i;
}
i++;
}
return -1;
}
public RowAnnotation getAnnotation() {
if (_annotation == null) {
// only occurs for deserialized instances
return RowAnnotations.getDefaultFactory().createAnnotation();
}
return _annotation;
}
public int getAnnotatedRowCount() {
return getAnnotation().getRowCount();
}
}