/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.data;
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* Utility tool to ease the specification of selection of variable,
* based on lists or ranges of variable names.
* Variable ranges can be specified directly as a list of variable indexes
* or as a list of variable ranges.
* <p>
* Variable ranges syntax uses as range separator "~", and as column
* range delimiter the comma ",". Thus "a~d" means all the variables, starting
* with variable a and ending with variable d, inclusive. A single variable
* name is also a range.
*
* @author <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a>
*/
public interface VRange {
static VRange all() {
return new VRangeByName(VRangeByName.ALL);
}
static VRange of(String... varNames) {
return new VRangeByName(varNames);
}
static VRange of(Collection<String> varNames) {
String[] names = new String[varNames.size()];
int i = 0;
for (String varName : varNames) {
names[i++] = varName;
}
return new VRangeByName(names);
}
static VRange of(int... varIndexes) {
return new VRangeByName(varIndexes);
}
static VRange byName(Predicate<String> filter) {
return new VRangeByPredName(filter);
}
static VRange byFilter(Predicate<Var> filter) {
return new VRangeByPred(filter);
}
static VRange onlyTypes(VarType... types) {
Set<VarType> keep = Arrays.stream(types).collect(Collectors.toSet());
return new VRangeByPred(var -> keep.contains(var.type()));
}
List<Integer> parseVarIndexes(Frame df);
List<String> parseVarNames(Frame df);
List<String> parseInverseVarNames(Frame df);
}
class VRangeByName implements VRange {
static final String DELIMITER = ",";
static final String RANGE = "~";
static final String ALL = "all";
private final String rawColumnRange;
/**
* Builds a var range directly from a list of var indexes.
*
* @param indexes list of var indexes
*/
public VRangeByName(int... indexes) {
if (indexes.length == 0) {
throw new IllegalArgumentException("No column indexes specified.");
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < indexes.length; i++) {
if (i > 0) {
sb.append(",");
}
sb.append(String.valueOf(indexes[i]));
}
this.rawColumnRange = sb.toString();
}
/**
* Builds a var range from var ranges formatted as strings with the required syntax.
*
* @param ranges var ranges specified in string format
*/
public VRangeByName(String... ranges) {
StringBuilder sb = new StringBuilder();
Arrays.stream(ranges).forEach(s -> {
if (sb.length() > 0)
sb.append(DELIMITER);
sb.append(s);
});
this.rawColumnRange = sb.toString();
}
/**
* Apply a var range over a frame, obtaining the list of var indexes for that frame.
*
* @param df target frame
* @return a list of column indexes which corresponds to column range
*/
@Override
public List<Integer> parseVarIndexes(Frame df) {
List<Integer> colIndexes = new ArrayList<>();
if (ALL.equals(rawColumnRange)) {
for (int i = 0; i < df.varCount(); i++) {
colIndexes.add(i);
}
return colIndexes;
}
String[] ranges = rawColumnRange.split(DELIMITER);
Set<String> colNames = df.varStream().map(Var::name).collect(Collectors.toSet());
for (String range : ranges) {
int start, end;
if (range.contains(RANGE)) {
String[] parts = range.split(RANGE);
if (!colNames.contains(parts[0])) {
start = Integer.parseInt(parts[0]);
} else {
start = df.varIndex(parts[0]);
}
if (!colNames.contains(parts[1])) {
end = Integer.parseInt(parts[1]);
} else {
end = df.varIndex(parts[1]);
}
} else {
if (!colNames.contains(range)) {
try {
start = Integer.parseInt(range);
} catch (NumberFormatException ex) {
continue;
}
} else {
start = df.varIndex(range);
}
end = start;
}
for (int j = start; j <= end; j++) {
colIndexes.add(j);
}
}
return colIndexes;
}
@Override
public List<String> parseVarNames(Frame df) {
return parseVarIndexes(df).stream().map(i -> df.varNames()[i]).collect(Collectors.toList());
}
@Override
public List<String> parseInverseVarNames(Frame df) {
Set<Integer> indexes = new HashSet<>(parseVarIndexes(df));
return IntStream.range(0, df.varCount()).filter(i -> !indexes.contains(i)).boxed().map(i -> df.var(i).name()).collect(Collectors.toList());
}
}
class VRangeByPredName implements VRange {
private final Predicate<String> predicate;
VRangeByPredName(Predicate<String> predicate) {
this.predicate = predicate;
}
@Override
public List<Integer> parseVarIndexes(Frame df) {
return IntStream.range(0, df.varCount())
.filter(i -> predicate.test(df.var(i).name()))
.boxed()
.collect(Collectors.toList());
}
@Override
public List<String> parseVarNames(Frame df) {
return df.varStream().map(Var::name)
.filter(predicate::test)
.collect(Collectors.toList());
}
@Override
public List<String> parseInverseVarNames(Frame df) {
return df.varStream().map(Var::name)
.filter(name -> !predicate.test(name))
.collect(Collectors.toList());
}
}
class VRangeByPred implements VRange {
private final Predicate<Var> predicate;
VRangeByPred(Predicate<Var> predicate) {
this.predicate = predicate;
}
@Override
public List<Integer> parseVarIndexes(Frame df) {
return IntStream.range(0, df.varCount())
.filter(i -> predicate.test(df.var(i)))
.boxed()
.collect(Collectors.toList());
}
@Override
public List<String> parseVarNames(Frame df) {
return df.varStream()
.filter(predicate::test)
.map(Var::name)
.collect(Collectors.toList());
}
@Override
public List<String> parseInverseVarNames(Frame df) {
return df.varStream()
.filter(var -> !predicate.test(var))
.map(Var::name)
.collect(Collectors.toList());
}
}