/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.data;
import java.util.*;
/**
* Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a>
*/
public class BoundFrame extends AbstractFrame {
/**
* Builds a new bound frame by binding variables of multiple given frames.
* All variable names must be unique among all the given frames.
* The row count is the minimum of the row counts from all the given frames.
*
* @param dfs given data frames
* @return new frame bound frame by binding variables
*/
public static BoundFrame byVars(Frame... dfs) {
if (dfs.length == 0) {
return new BoundFrame(0, new ArrayList<>(), new String[]{}, new HashMap<>());
}
Integer _rowCount = null;
List<Var> _vars = new ArrayList<>();
List<String> _names = new ArrayList<>();
Map<String, Integer> _indexes = new HashMap<>();
Set<String> _namesSet = new HashSet<>();
int pos = 0;
for (Frame df : dfs) {
if (_rowCount == null) {
_rowCount = df.varCount() > 0 ? df.rowCount() : null;
} else {
_rowCount = Math.min(_rowCount, df.rowCount());
}
for (int j = 0; j < df.varCount(); j++) {
if (_namesSet.contains(df.var(j).name())) {
throw new IllegalArgumentException("bound frame does not allow variables with the same name");
}
_vars.add(df.var(j));
_names.add(df.var(j).name());
_namesSet.add(df.var(j).name());
_indexes.put(df.var(j).name(), pos++);
}
}
return new BoundFrame(_rowCount == null ? 0 : _rowCount, _vars, _names.toArray(new String[_names.size()]), _indexes);
}
public static BoundFrame byVars(Collection<Var> varList) {
return byVars(varList.stream().toArray(Var[]::new));
}
/**
* Builds a new bound frame by binding given variables.
* All variable names must be unique.
* The row count is the minimum of the row counts from all the given variables.
*
* @param varList given data variables
* @return new frame bound frame by binding variables
*/
public static BoundFrame byVars(Var... varList) {
if (varList.length == 0) {
return new BoundFrame(0, new ArrayList<>(), new String[]{}, new HashMap<>());
}
int _rowCount = 0;
List<Var> _vars = new ArrayList<>();
List<String> _names = new ArrayList<>();
Map<String, Integer> _indexes = new HashMap<>();
Set<String> _namesSet = new HashSet<>();
int pos = 0;
for (int i = 0; i < varList.length; i++) {
if (i == 0) {
_rowCount = varList[i].rowCount();
} else {
_rowCount = Math.min(_rowCount, varList[i].rowCount());
}
if (_namesSet.contains(varList[i].name())) {
throw new IllegalArgumentException("bound frame does not allow variables with the same name");
}
_vars.add(varList[i]);
_names.add(varList[i].name());
_namesSet.add(varList[i].name());
_indexes.put(varList[i].name(), pos++);
}
return new BoundFrame(_rowCount, _vars, _names.toArray(new String[_names.size()]), _indexes);
}
public static BoundFrame byRows(Frame... dfs) {
if (dfs.length == 0) {
return new BoundFrame(0, new ArrayList<>(), new String[]{}, new HashMap<>());
}
String[] _names = dfs[0].varNames();
// check that in each frame to exist all the variables and to have the same type
// otherwise throw an exception
for (int i = 1; i < dfs.length; i++) {
String[] compNames = dfs[i].varNames();
if (compNames.length != _names.length) {
throw new IllegalArgumentException("can't bind by rows frames with different variable count");
}
for (int j = 0; j < _names.length; j++) {
if (!_names[j].equals(compNames[j])) {
throw new IllegalArgumentException("can't bind by rows frames with different variable " +
"names or with different order of the variables");
}
}
for (String _name : _names) {
// throw an exception if the column does not exists
if (!dfs[i].var(_name).type().equals(dfs[0].var(_name).type())) {
// column exists but does not have the same type
throw new IllegalArgumentException("can't bind by rows variable of different types");
}
}
}
List<Var> _vars = new ArrayList<>();
Map<String, Integer> _indexes = new HashMap<>();
// for each var name build a bounded var from all the rows from all the frames
for (int i = 0; i < _names.length; i++) {
List<Integer> counts = new ArrayList<>();
List<Var> boundVars = new ArrayList<>();
for (Frame df : dfs) {
counts.add(df.rowCount()); // avoid to take rowCount from variable, but from frame
boundVars.add(df.var(_names[i]));
}
Var boundedVar = BoundVar.from(counts, boundVars).withName(_names[i]);
_vars.add(boundedVar);
_indexes.put(_names[i], i);
}
int _rowCount = Arrays.stream(dfs).mapToInt(Frame::rowCount).sum();
return new BoundFrame(_rowCount, _vars, _names, _indexes);
}
private static final long serialVersionUID = -445349340356580788L;
private final int rowCount;
private final List<Var> vars;
private final String[] names;
private final Map<String, Integer> indexes;
private BoundFrame(int rowCount, List<Var> vars, String[] names, Map<String, Integer> indexes) {
this.rowCount = rowCount;
this.vars = vars;
this.names = names;
this.indexes = indexes;
}
@Override
public int rowCount() {
return rowCount;
}
@Override
public int varCount() {
return vars.size();
}
@Override
public String[] varNames() {
return names;
}
@Override
public int varIndex(String name) {
return indexes.get(name);
}
@Override
public Var var(int pos) {
return vars.get(pos);
}
@Override
public Var var(String name) {
return vars.get(indexes.get(name));
}
@Override
public Frame bindVars(Var... vars) {
return BoundFrame.byVars(this, BoundFrame.byVars(vars));
}
@Override
public Frame bindVars(Frame df) {
return BoundFrame.byVars(this, df);
}
@Override
public Frame mapVars(VRange range) {
List<String> parseVarNames = range.parseVarNames(this);
String[] _names = new String[parseVarNames.size()];
List<Var> _vars = new ArrayList<>();
Map<String, Integer> _indexes = new HashMap<>();
for (int i = 0; i < parseVarNames.size(); i++) {
_names[i] = parseVarNames.get(i);
_vars.add(var(parseVarNames.get(i)));
_indexes.put(parseVarNames.get(i), i);
}
return new BoundFrame(rowCount, _vars, _names, _indexes);
}
@Override
public Frame addRows(int rowCount) {
return BoundFrame.byRows(this, SolidFrame.emptyFrom(this, rowCount));
}
@Override
public Frame bindRows(Frame df) {
return BoundFrame.byRows(this, df);
}
@Override
public Frame mapRows(Mapping mapping) {
return MappedFrame.byRow(this, mapping);
}
}