package hex; import water.H2O; import water.Iced; import water.Job; import water.Key; import water.fvec.Frame; import water.fvec.FrameCreator; import water.util.Log; import water.util.PrettyPrint; import java.util.Random; /** * Create a Frame from scratch * If randomize = true, then the frame is filled with Random values. */ public class CreateFrame extends Iced { public final Job<Frame> _job; public long rows = 10000; public int cols = 10; public long seed = -1; public long seed_for_column_types = -1; public boolean randomize = true; public long value = 0; public long real_range = 100; public double categorical_fraction = 0.2; public int factors = 100; public double integer_fraction = 0.2; public double time_fraction = 0.0; public double string_fraction = 0.0; public long integer_range = 100; public double binary_fraction = 0.1; public double binary_ones_fraction = 0.02; public double missing_fraction = 0.01; public int response_factors = 2; public boolean positive_response; // only for response_factors=1 public boolean has_response = false; public CreateFrame(Key<Frame> key) { _job = new Job<>(key,Frame.class.getName(),"CreateFrame"); } public CreateFrame() { this(Key.<Frame>make()); } public Job<Frame> execImpl() { if (seed == -1) seed = new Random().nextLong(); if (seed_for_column_types == -1) seed_for_column_types = seed; if (integer_fraction + binary_fraction + categorical_fraction + time_fraction + string_fraction > 1.00000001) throw new IllegalArgumentException("Integer, binary, categorical, time and string fractions must add up to <= 1."); if (missing_fraction < 0 || missing_fraction > 1) throw new IllegalArgumentException("Missing fraction must be between 0 and 1."); if (integer_fraction < 0 || integer_fraction > 1) throw new IllegalArgumentException("Integer fraction must be between 0 and 1."); if (binary_fraction < 0 || binary_fraction > 1) throw new IllegalArgumentException("Binary fraction must be between 0 and 1."); if (time_fraction <0 || time_fraction > 1) throw new IllegalArgumentException("Time fraction must be between 0 and 1."); if (string_fraction <0 || string_fraction > 1) throw new IllegalArgumentException("String fraction must be between 0 and 1."); if (binary_ones_fraction < 0 || binary_ones_fraction > 1) throw new IllegalArgumentException("Binary ones fraction must be between 0 and 1."); if (categorical_fraction < 0 || categorical_fraction > 1) throw new IllegalArgumentException("Categorical fraction must be between 0 and 1."); if (categorical_fraction > 0 && factors <= 1) throw new IllegalArgumentException("Factors must be larger than 2 for categorical data."); if (response_factors < 1) throw new IllegalArgumentException("Response factors must be either 1 (real-valued response), or >=2 (factor levels)."); if (response_factors > Model.Parameters.MAX_SUPPORTED_LEVELS) throw new IllegalArgumentException("Response factors must be <= " + Model.Parameters.MAX_SUPPORTED_LEVELS + "."); if (factors > 1000000) throw new IllegalArgumentException("Number of factors must be <= 1,000,000)."); if (cols <= 0 || rows <= 0) throw new IllegalArgumentException("Must have number of rows > 0 and columns > 0."); // estimate byte size of the frame double byte_estimate = randomize ? rows * cols * ( binary_fraction * 1./8 //bits + categorical_fraction * (factors < 128 ? 1 : factors < 32768 ? 2 : 4) + integer_fraction * (integer_range < 128 ? 1 : integer_range < 32768 ? 2 : integer_range < (1<<31) ? 4 : 8) + time_fraction * 8 + (1-integer_fraction - binary_fraction - categorical_fraction - time_fraction - string_fraction) * 8 ) //reals + rows //response is : 0; // all constants - should be small long cluster_free_mem = H2O.CLOUD.free_mem(); if (byte_estimate > cluster_free_mem) throw new IllegalArgumentException("Frame is expected to require " + PrettyPrint.bytes((long) byte_estimate) + ", won't fit into H2O's free memory of "+ cluster_free_mem); if (!randomize) { if (integer_fraction != 0 || categorical_fraction != 0 || time_fraction != 0 || string_fraction != 0) throw new IllegalArgumentException("Cannot have integer, categorical or time fractions > 0 unless randomize=true."); } else { if (value != 0) throw new IllegalArgumentException("Cannot set data to a constant value if randomize=true."); } Log.info("Generated seed: " + seed); FrameCreator fc = new FrameCreator(this); return _job.start(fc,fc.nChunks()*7); // And start FrameCreator } }