/* * Joinery -- Data frames for Java * Copyright (c) 2014, 2015 IBM Corp. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package joinery.impl; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import joinery.DataFrame; import joinery.DataFrame.JoinType; import joinery.DataFrame.KeyFunction; public class Combining { public static <V> DataFrame<V> join(final DataFrame<V> left, final DataFrame<V> right, final JoinType how, final KeyFunction<V> on) { final Iterator<Object> leftIt = left.index().iterator(); final Iterator<Object> rightIt = right.index().iterator(); final Map<Object, List<V>> leftMap = new LinkedHashMap<>(); final Map<Object, List<V>> rightMap = new LinkedHashMap<>(); for (final List<V> row : left) { final Object name = leftIt.next(); final Object key = on == null ? name : on.apply(row); if (leftMap.put(key, row) != null) { throw new IllegalArgumentException("generated key is not unique: " + key); } } for (final List<V> row : right) { final Object name = rightIt.next(); final Object key = on == null ? name : on.apply(row); if (rightMap.put(key, row) != null) { throw new IllegalArgumentException("generated key is not unique: " + key); } } final List<Object> columns = new ArrayList<>(how != JoinType.RIGHT ? left.columns() : right.columns()); for (Object column : how != JoinType.RIGHT ? right.columns() : left.columns()) { final int index = columns.indexOf(column); if (index >= 0) { if (column instanceof List) { @SuppressWarnings("unchecked") final List<Object> l1 = List.class.cast(columns.get(index)); l1.add(how != JoinType.RIGHT ? "left" : "right"); @SuppressWarnings("unchecked") final List<Object> l2= List.class.cast(column); l2.add(how != JoinType.RIGHT ? "right" : "left"); } else { columns.set(index, String.format("%s_%s", columns.get(index), how != JoinType.RIGHT ? "left" : "right")); column = String.format("%s_%s", column, how != JoinType.RIGHT ? "right" : "left"); } } columns.add(column); } final DataFrame<V> df = new DataFrame<>(columns); for (final Map.Entry<Object, List<V>> entry : how != JoinType.RIGHT ? leftMap.entrySet() : rightMap.entrySet()) { final List<V> tmp = new ArrayList<>(entry.getValue()); final List<V> row = how != JoinType.RIGHT ? rightMap.get(entry.getKey()) : leftMap.get(entry.getKey()); if (row != null || how != JoinType.INNER) { tmp.addAll(row != null ? row : Collections.<V>nCopies(right.columns().size(), null)); df.append(entry.getKey(), tmp); } } if (how == JoinType.OUTER) { for (final Map.Entry<Object, List<V>> entry : how != JoinType.RIGHT ? rightMap.entrySet() : leftMap.entrySet()) { final List<V> row = how != JoinType.RIGHT ? leftMap.get(entry.getKey()) : rightMap.get(entry.getKey()); if (row == null) { final List<V> tmp = new ArrayList<>(Collections.<V>nCopies( how != JoinType.RIGHT ? left.columns().size() : right.columns().size(), null)); tmp.addAll(entry.getValue()); df.append(entry.getKey(), tmp); } } } return df; } public static <V> DataFrame<V> joinOn(final DataFrame<V> left, final DataFrame<V> right, final JoinType how, final Integer ... cols) { return join(left, right, how, new KeyFunction<V>() { @Override public Object apply(final List<V> value) { final List<V> key = new ArrayList<>(cols.length); for (final int col : cols) { key.add(value.get(col)); } return Collections.unmodifiableList(key); } }); } public static <V> DataFrame<V> merge(final DataFrame<V> left, final DataFrame<V> right, final JoinType how) { final Set<Object> intersection = new LinkedHashSet<>(left.nonnumeric().columns()); intersection.retainAll(right.nonnumeric().columns()); final Object[] columns = intersection.toArray(new Object[intersection.size()]); return join(left.reindex(columns), right.reindex(columns), how, null); } @SafeVarargs public static <V> void update(final DataFrame<V> dest, final boolean overwrite, final DataFrame<? extends V> ... others) { for (int col = 0; col < dest.size(); col++) { for (int row = 0; row < dest.length(); row++) { if (overwrite || dest.get(row, col) == null) { for (final DataFrame<? extends V> other : others) { if (col < other.size() && row < other.length()) { final V value = other.get(row, col); if (value != null) { dest.set(row, col, value); break; } } } } } } } }