package hex;
import water.*;
import water.fvec.*;
import water.util.ArrayUtils;
import static water.util.FrameUtils.generateNumKeys;
/**
* Split given frame based on given ratio.
*
* If single number is given then it splits a given frame into two frames (FIXME: will throw exception)
* if N ratios are given then then N-splits are produced.
*/
public class SplitFrame extends Transformer<SplitFrame.Frames> {
/** Input dataset to split */
public Frame _dataset;
/** Split ratios */
public double[] _ratios;
/** Output destination keys. */
public Key<Frame>[] _destination_frames;
public SplitFrame(Frame dataset, double[] ratios, Key<Frame>[] destination_frames) {
this();
_dataset = dataset;
_ratios = ratios;
_destination_frames = destination_frames;
}
public SplitFrame() { super(null, "hex.SplitFrame$Frames", "SplitFrame"); }
@Override public Job<Frames> execImpl() {
if (_ratios.length < 0) throw new IllegalArgumentException("No ratio specified!");
if (_ratios.length > 100) throw new IllegalArgumentException("Too many frame splits demanded!");
// Check the case for single ratio - FIXME in /4 version change this to throw exception
for (double r : _ratios)
if (r <= 0.0) new IllegalArgumentException("Ratio must be > 0!");
if (_ratios.length == 1)
if( _ratios[0] < 0.0 || _ratios[0] > 1.0 ) throw new IllegalArgumentException("Ratio must be between 0 and 1!");
if (_destination_frames != null &&
!((_ratios.length == 1 && _destination_frames.length == 2) || (_ratios.length == _destination_frames.length)))
throw new IllegalArgumentException("Number of destination keys has to match to a number of split ratios!");
// If array of ratios is given scale them and take first n-1 and pass them to FrameSplitter
final double[] computedRatios;
if (_ratios.length > 1) {
double sum = ArrayUtils.sum(_ratios);
if (sum <= 0.0) throw new IllegalArgumentException("Ratios sum has to be > 0!");
if( sum < 1 ) computedRatios = _ratios;
else {
computedRatios = new double[_ratios.length - 1];
for (int i = 0; i < _ratios.length - 1; i++) computedRatios[i] = _ratios[i] / sum;
}
} else {
computedRatios = _ratios;
}
// Create destination keys if not specified
if (_destination_frames == null) _destination_frames = generateNumKeys(_dataset._key, computedRatios.length+1);
FrameSplitter fs = new FrameSplitter(_dataset, computedRatios, _destination_frames, _job._key);
return _job.start(fs, computedRatios.length + 1);
}
public static class Frames extends Keyed { public Key<Frame>[] _keys; }
}