/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.data;
import rapaio.data.filter.FFilter;
import rapaio.data.stream.FSpot;
import rapaio.data.stream.FSpots;
import rapaio.printer.Printable;
import rapaio.sys.WS;
import rapaio.printer.Summary;
import java.io.Serializable;
import java.util.*;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import static java.util.stream.Collectors.toList;
/**
* Random access list of observed values for multiple variables.
* <p>
* The observed values are represented in a tabular format.
* Rows corresponds to observations and columns corresponds to observed variables.
*
* @author <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a>
*/
public interface Frame extends Serializable, Printable {
/**
* Number of observations contained in frame. Observations are accessed by position.
*
* @return number of observations
*/
int rowCount();
/**
* Number of variables contained in frame. Variable references could be obtained by name or by position.
* <p>
* Each variable corresponds to a column in tabular format, thus in the frame terminology
* this is denoted as var (short form of column).
*
* @return number of variables
*/
int varCount();
/**
* Returns an array of variable names. The names are ordered by the position of the variables.
* <p>
* Each variable has it's own name. Inside a frame a specific variable could be named differently.
* However, the default name for a variable inside a frame is own variable name.
*
* @return array of var names
*/
String[] varNames();
/**
* Returns the index (position) of the var inside the frame given the var name as parameter.
*
* @param name var name
* @return column position inside the frame corresponding to the var with the specified name
*/
int varIndex(String name);
/**
* Returns a var object from the given position
*
* @param pos position of the column inside the frame
* @return a var type reference
*/
Var var(int pos);
/**
* Returns a var object with given name
*
* @param name name of the column inside the frame
* @return a var type reference
*/
Var var(String name);
/**
* Adds the given variables to the variables of the current frame to build a new frame.
* New variables must have the same number of rows.
*
* @param vars variables added to the current frame variables
* @return new frame with current frame variables and given variables added
*/
Frame bindVars(Var... vars);
/**
* Adds the variables from the given frame to the variables of the current frame to build a new frame.
* New variables from the given frame must have the same row count.
*
* @param df given frame with variables which will be added
* @return new frame with the current frame variables and given frame variables
*/
Frame bindVars(Frame df);
/**
* Builds a new frame which has only the variables specified in variable range
*
* @param range given variable range
* @return new frame with only given variables
*/
Frame mapVars(VRange range);
/**
* Builds a new frame which has only the variables specified in the variable range string
*
* @param varRange variable range as string
* @return new frame with only the given variables
*/
default Frame mapVars(String... varRange) {
return mapVars(VRange.of(varRange));
}
default Frame mapVars(List<String> varRange) {
return mapVars(VRange.of(varRange.toArray(new String[varRange.size()])));
}
/**
* Builds a new frame with all columns except the ones specified in variable range
*
* @param range given variable range which will be deleted
* @return new frame with the non-deleted variables
*/
default Frame removeVars(VRange range) {
Set<String> remove = new HashSet<>(range.parseVarNames(this));
if(remove.isEmpty())
return this;
if(remove.size() == this.varCount())
return SolidFrame.byVars();
int[] retain = new int[varNames().length - remove.size()];
int pos = 0;
for (String varName : varNames()) {
if (remove.contains(varName)) continue;
retain[pos++] = varIndex(varName);
}
return mapVars(VRange.of(retain));
}
/**
* Builds a new frame with all variables except ones specified in variable range string
*
* @param varRange variable range as string
* @return new frame with the non-deleted variables
*/
default Frame removeVars(String... varRange) {
return removeVars(VRange.of(varRange));
}
/**
* Builds a new frame with all the variables except ones in the given var indexes
*/
default Frame removeVars(int... varIndexes) {
return removeVars(VRange.of(varIndexes));
}
/**
* Adds the following rowCount at the end of the frame.
* The effect depends on the implementation, for solid frames
* it increases rowCount, for other types it creates
* a bounded frame.
*
* @param rowCount number of rowCount to be added
* @return new frame with rowCount appended
*/
Frame addRows(int rowCount);
/**
* Builds a new frame having rows of the current frame, followed by the rows of the bounded frame.
* The new frame must has the same variable definitions as the current frame.
*
* @param df given frame with additional rows
* @return new frame with additional rows
*/
Frame bindRows(Frame df);
/**
* Builds a new mapped frame having the given rows.
*
* @param rows given rows to be selected
* @return new mapped frame with given rows
*/
default Frame mapRows(int... rows) {
return mapRows(Mapping.copy(rows));
}
/**
* Builds a new frame only with rows specified in mapping.
*
* @param mapping a list of rows from a frame
* @return new frame with selected rows
*/
Frame mapRows(Mapping mapping);
/**
* Builds a new frame only with rows not specified in mapping.
*/
default Frame removeRows(int... rows) {
return removeRows(Mapping.copy(rows));
}
/**
* Builds a new frame only with rows not specified in mapping.
*/
default Frame removeRows(Mapping mapping) {
Set<Integer> remove = mapping.rowStream().mapToObj(i -> i).collect(Collectors.toSet());
List<Integer> map = IntStream.range(0, rowCount()).filter(row -> !remove.contains(row)).mapToObj(i -> i).collect(toList());
return mapRows(Mapping.wrap(map));
}
/**
* Returns double value corresponding to given row and var index
*
* @param row row number
* @param varIndex variable index
* @return numeric value
*/
default double value(int row, int varIndex) {
return var(varIndex).value(row);
}
/**
* Returns double value from given row and varName
*
* @param row row number
* @param varName variable name
* @return numeric value
*/
default double value(int row, String varName) {
return var(varName).value(row);
}
/**
* Set double value for given row and var index
*
* @param row row number
* @param varIndex var index
* @param value numeric value
*/
default void setValue(int row, int varIndex, double value) {
var(varIndex).setValue(row, value);
}
/**
* Convenient shortcut method to call {@link Var#setValue(int, double)} for a given variable.
*
* @param row row number
* @param varName var name
* @param value numeric value
*/
default void setValue(int row, String varName, double value) {
var(varName).setValue(row, value);
}
/**
* Convenient shortcut method for calling {@link Var#index(int)} for a given variable.
*
* @param row row number
* @param varIndex column number
* @return index value
*/
default int index(int row, int varIndex) {
if (varIndex >= varCount())
throw new IllegalArgumentException("frame has " + varCount() + " variables, there is no var at index: " + varIndex);
return var(varIndex).index(row);
}
/**
* Convenient shortcut method for calling {@link Var#index(int)} for a given variable.
*
* @param row row number
* @param varName var name
* @return index value
*/
default int index(int row, String varName) {
return var(varName).index(row);
}
/**
* Convenient shortcut method for calling {@link Var#setIndex(int, int)} for given variable.
*
* @param row row number
* @param varIndex var index
* @param value setIndex value
*/
default void setIndex(int row, int varIndex, int value) {
var(varIndex).setIndex(row, value);
}
/**
* Convenient shortcut method for calling {@link Var#setIndex(int, int)} for given variable.
*
* @param row row number
* @param varName var name
* @param value index value
*/
default void setIndex(int row, String varName, int value) {
var(varName).setIndex(row, value);
}
/**
* Convenient shortcut method for calling {@link Var#label(int)} for given variable.
*
* @param row row number
* @param varIndex var index
* @return nominal label value
*/
default String label(int row, int varIndex) {
return var(varIndex).label(row);
}
/**
* Convenient shortcut method for calling {@link Var#label(int)} for given variable.
*
* @param row row number
* @param varName var name
* @return nominal label value
*/
default String label(int row, String varName) {
return var(varName).label(row);
}
/**
* Convenient shortcut method for calling {@link Var#setLabel(int, String)} for given variable.
*
* @param row row number
* @param varIndex var index
* @param value nominal label value
*/
default void setLabel(int row, int varIndex, String value) {
var(varIndex).setLabel(row, value);
}
/**
* Convenient shortcut method for calling {@link Var#setLabel(int, String)} for given variable.
*
* @param row row number
* @param varName column name
* @param value nominal label value
*/
default void setLabel(int row, String varName, String value) {
var(varName).setLabel(row, value);
}
/**
* Returns binary value from the given cell
*
* @param row row number
* @param varIndex variable index
* @return binary value found
*/
default boolean binary(int row, int varIndex) {
return var(varIndex).binary(row);
}
/**
* Returns binary value from given cell
*
* @param row row number
* @param varName var name
* @return binary value found
*/
default boolean binary(int row, String varName) {
return var(varName).binary(row);
}
/**
* Binary value setter for given cell
*
* @param row row number
* @param varIndex var index
* @param value value to be set
*/
default void setBinary(int row, int varIndex, boolean value) {
var(varIndex).setBinary(row, value);
}
/**
* Binary value setter for given cell
*
* @param row row number
* @param varName var name
* @param value value to be set
*/
default void setBinary(int row, String varName, boolean value) {
var(varName).setBinary(row, value);
}
/**
* Convenient shortcut method for calling {@link Var#missing(int)} for given column
*
* @param row row number
* @param col column number
* @return true if missing, false otherwise
*/
default boolean missing(int row, int col) {
return var(col).missing(row);
}
/**
* Convenient shortcut method for calling {@link Var#missing(int)} for given column
*
* @param row row number
* @param varName var name
* @return true if missing, false otherwise
*/
default boolean missing(int row, String varName) {
return var(varName).missing(row);
}
/**
* Returns true if there is at least one missing value for the given row, in any column.
*
* @param row row number
* @return true if there is a missing value for any variable at the given row
*/
default boolean missing(int row) {
for (String colName : varNames()) {
if (var(colName).missing(row)) return true;
}
return false;
}
/**
* Convenient shortcut method for calling {@link Var#setMissing(int)} for given column
*
* @param row row number
* @param col column number
*/
default void setMissing(int row, int col) {
var(col).setMissing(row);
}
/**
* Convenient shortcut method for calling {@link Var#setMissing(int)} for given column
*
* @param row row number
* @param varName var name
*/
default void setMissing(int row, String varName) {
var(varName).setMissing(row);
}
default SolidFrame solidCopy() {
final String[] names = varNames();
final Var[] vars = new Var[names.length];
for (int i = 0; i < names.length; i++) {
vars[i] = var(names[i]).solidCopy().withName(names[i]);
}
return SolidFrame.byVars(vars);
}
/**
* @return a stream of FSpot
*/
default FSpots stream() {
return new FSpots(IntStream.range(0, rowCount()).mapToObj(row -> new FSpot(this, row)), this);
}
/**
* Returns a list of FSpots, one spot for each frame row
*
* @return list of spots
*/
default List<FSpot> spotList() {
return IntStream.range(0, rowCount()).mapToObj(row -> new FSpot(this, row)).collect(toList());
}
/**
* @return stream of all variables from frame
*/
default Stream<Var> varStream() {
return Arrays.stream(varNames()).map(this::var);
}
/**
* @return a list with all variables from a frame
*/
default List<Var> varList() {
return varStream().collect(toList());
}
default Frame fitApply(FFilter... inputFilters) {
Frame df = this;
for (FFilter filter : inputFilters) {
df = filter.fitApply(df);
}
return df;
}
static Collector<Var, List<Var>, Frame> collector() {
return new Collector<Var, List<Var>, Frame>() {
@Override
public Supplier<List<Var>> supplier() {
return LinkedList::new;
}
@Override
public BiConsumer<List<Var>, Var> accumulator() {
return List::add;
}
@Override
public BinaryOperator<List<Var>> combiner() {
return (list1, list2) -> {
list1.addAll(list2);
return list1;
};
}
@Override
public Function<List<Var>, Frame> finisher() {
return BoundFrame::byVars;
}
@Override
public Set<Characteristics> characteristics() {
return new HashSet<>();
}
};
}
@Override
default String summary() {
return Summary.summary(this);
}
default void printLines() {
printLines(rowCount());
}
default String lines(int to) {
Var[] vars = new Var[varCount()];
String[] names = varNames();
for (int i = 0; i < vars.length; i++) {
vars[i] = var(i);
}
return Summary.headString(to, vars, names);
}
default void printLines(int to) {
WS.code(lines(to));
}
default boolean deepEquals(Frame df) {
if (rowCount() != df.rowCount())
return false;
if (varCount() != df.varCount())
return false;
String[] names = varNames();
String[] dfNames = df.varNames();
if (names.length != dfNames.length)
return false;
for (int i = 0; i < names.length; i++) {
if (!(names[i].equals(dfNames[i]))) {
return false;
}
}
for (int i = 0; i < names.length; i++) {
if (!(var(names[i]).deepEquals(df.var(dfNames[i])))) {
return false;
}
}
return true;
}
}