package water.rapids.ast.prims.mungers; import water.fvec.*; import water.rapids.Env; import water.rapids.Val; import water.rapids.ast.AstPrimitive; import water.rapids.ast.AstRoot; import water.rapids.vals.ValNums; import java.util.ArrayList; /** * Get column indexes of an H2OFrame that are of a certain data type. * <p/> * This will take an H2OFrame and return all column indexes based on a specific data type (numeric, categorical, * string,time, uuid, and bad) * <p/> * * @author navdeepgill * @version 3.10 * @since 3.10 * */ public class AstColumnsByType extends AstPrimitive { @Override public String[] args() { return new String[]{"ary","type"}; } private enum DType {Numeric,Categorical,String,Time,UUID,Bad} @Override public String str() { return "columnsByType"; } @Override public int nargs() { return 1 + 2; } //ary type @Override public ValNums apply(Env env, Env.StackHelp stk, AstRoot asts[]) { Frame fr = stk.track(asts[1].exec(env)).getFrame(); String type = stk.track(asts[2].exec(env)).getStr(); DType dtype; switch (type) { case "numeric": // Numeric, but not categorical or time dtype = DType.Numeric; break; case "categorical": // Integer, with a categorical/factor String mapping dtype = DType.Categorical; break; case "string": // String dtype = DType.String; break; case "time": // Long msec since the Unix Epoch - with a variety of display/parse options dtype = DType.Time; break; case "uuid": // UUID dtype = DType.UUID; break; case "bad": // No none-NA rows (triple negative! all NAs or zero rows) dtype = DType.Bad; break; default: throw new IllegalArgumentException("unknown data type to filter by: " + type); } Vec vecs[] = fr.vecs(); ArrayList<Double> idxs = new ArrayList<>(); for (double i = 0; i < fr.numCols(); i++) if (dtype.equals(DType.Numeric) && vecs[(int) i].isNumeric()){ idxs.add(i); } else if (dtype.equals(DType.Categorical) && vecs[(int) i].isCategorical()){ idxs.add(i); } else if (dtype.equals(DType.String) && vecs[(int) i].isString()){ idxs.add(i); } else if (dtype.equals(DType.Time) && vecs[(int) i].isTime()){ idxs.add(i); } else if (dtype.equals(DType.UUID) && vecs[(int) i].isUUID()){ idxs.add(i); } else if (dtype.equals(DType.Bad) && vecs[(int) i].isBad()){ idxs.add(i); } double[] include_cols = new double[idxs.size()]; int i = 0; for (double d : idxs) include_cols[i++] = (int) d; return new ValNums(include_cols); } }