package water.api.schemas3;
import water.AutoBuffer;
import water.H2O;
import water.Iced;
import water.IcedWrapper;
import water.api.API;
import water.util.TwoDimTable;
/**
* Client-facing Schema of a TwoDimTable
* Notes:
* 1) We embed the rowHeaders into the table, extending it by 1 column
* 2) We store all the data in column-major order
* 3) We store all the data in String format
*
*/
public class TwoDimTableV3 extends SchemaV3<TwoDimTable, TwoDimTableV3> {
public static class ColumnSpecsBase extends SchemaV3<Iced, ColumnSpecsBase> {
@API(help="Column Name", direction=API.Direction.OUTPUT)
String name;
@API(help="Column Type", direction=API.Direction.OUTPUT)
String type;
@API(help="Column Format (printf)", direction=API.Direction.OUTPUT)
String format;
@API(help="Column Description", direction=API.Direction.OUTPUT)
String description;
}
@API(help="Table Name", direction=API.Direction.OUTPUT)
public String name;
@API(help="Table Description", direction=API.Direction.OUTPUT)
public String description;
@API(help="Column Specification", direction=API.Direction.OUTPUT)
public ColumnSpecsBase[] columns;
@API(help="Number of Rows", direction=API.Direction.OUTPUT)
public int rowcount;
@API(help="Table Data (col-major)", direction=API.Direction.OUTPUT)
public IcedWrapper[][] data;
public TwoDimTableV3() {}
public TwoDimTableV3(TwoDimTable impl) { super(impl); }
/**
* Fill a TwoDimTable Schema from a TwoDimTable
* @param t TwoDimTable
* @return TwoDimTableSchema
*/
@Override
public TwoDimTableV3 fillFromImpl(TwoDimTable t) {
name = t.getTableHeader();
description = t.getTableDescription();
final int rows = t.getRowDim();
rowcount = rows;
boolean have_row_header_cols = t.getColHeaderForRowHeaders() != null;
for (int r=0; r<rows; ++r) {
if (!have_row_header_cols) break;
have_row_header_cols &= t.getRowHeaders()[r] != null;
}
if (have_row_header_cols) {
final int cols = t.getColDim()+1;
columns = new ColumnSpecsBase[cols];
columns[0] = new ColumnSpecsBase();
columns[0].name = pythonify(t.getColHeaderForRowHeaders());
columns[0].type = "string";
columns[0].format = "%s";
columns[0].description = t.getColHeaderForRowHeaders();
for (int c = 1; c < cols; ++c) {
columns[c] = new ColumnSpecsBase();
columns[c].name = pythonify(t.getColHeaders()[c - 1]);
columns[c].type = t.getColTypes()[c - 1];
columns[c].format = t.getColFormats()[c - 1];
columns[c].description = t.getColHeaders()[c - 1];
}
data = new IcedWrapper[cols][rows];
data[0] = new IcedWrapper[t.getRowDim()];
for (int r = 0; r < t.getRowDim(); ++r) {
data[0][r] = new IcedWrapper(t.getRowHeaders()[r]);
}
IcedWrapper[][] cellValues = t.getCellValues();
for (int c = 1; c < cols; ++c) {
data[c] = new IcedWrapper[rows];
for (int r = 0; r < rows; ++r) {
data[c][r] = cellValues[r][c - 1];
}
}
} else {
final int cols = t.getColDim();
columns = new ColumnSpecsBase[cols];
for (int c = 0; c < cols; ++c) {
columns[c] = new ColumnSpecsBase();
columns[c].name = pythonify(t.getColHeaders()[c]);
columns[c].type = t.getColTypes()[c];
columns[c].format = t.getColFormats()[c];
columns[c].description = t.getColHeaders()[c];
}
data = new IcedWrapper[cols][rows];
IcedWrapper[][] cellValues = t.getCellValues();
for (int c = 0; c < cols; ++c) {
data[c] = new IcedWrapper[rows];
for (int r = 0; r < rows; ++r) {
data[c][r] = cellValues[r][c];
}
}
}
return this;
}
/**
* Turn a description such as "Avg. Training MSE" into a JSON-usable field name "avg_training_mse"
* @param n
* @return
*/
private String pythonify(String n) {
if (n == null || name.toLowerCase().contains("confusion")) return n;
StringBuilder sb = new StringBuilder();
String [] modified = n.split("[\\s_]+");
for (int i=0; i<modified.length; ++i) {
if (i!=0) sb.append("_");
String s = modified[i];
// if (!s.matches("^[A-Z]{2,3}$")) {
sb.append(s.toLowerCase()); //everything goes lowercase
// } else {
// sb.append(s);
// }
}
String newString = sb.toString().replaceAll("[^\\w]", "");
// if (!newString.equals(name)) {
// Log.warn("Turning column description into field name: " + name + " --> " + newString);
// }
return newString;
}
/**
* Fill a TwoDimTable from this Schema
* @param impl
* @return
*/
public TwoDimTable fillImpl(TwoDimTable impl) {
final int rows = data[0].length;
assert(rows == rowcount);
final int cols = data.length+1;
String tableHeader = name;
String tableDescription = description;
String colHeaderForRowHeaders = columns[0].name;
String[] rowHeaders = new String[rows];
for (int r=0; r<rows; ++r) {
rowHeaders[r] = (String)data[0][r].get();
}
String[] colHeaders = new String[cols];
colHeaders[0] = "";
for (int c=1; c<cols; ++c) {
colHeaders[c] = columns[c].description;
}
String[] colTypes = new String[cols];
colTypes[0] = "";
for (int c=1; c<cols; ++c) {
colTypes[c] = columns[c].type;
}
String[] colFormats = new String[cols];
colFormats[0] = "%s";
for (int c=1; c<cols; ++c) {
colFormats[c] = columns[c].format;
}
String[][] strCellValues = new String[rows][cols];
double[][] dblCellValues = new double[rows][cols];
for (int r=0; r<data[0].length; ++r) {
for (int c=0; c<data.length; ++c) {
try {
if (columns[c].format.equals("string")) { // switch(String) is not java1.6 compliant!
strCellValues[r][c] = (String)data[c][r].get();
}
else if (columns[c].format.equals("double")) {
dblCellValues[r][c] = (Double)data[c][r].get();
}
else if (columns[c].format.equals("float")) {
dblCellValues[r][c] = (Float)data[c][r].get();
}
else if (columns[c].format.equals("int")) {
dblCellValues[r][c] = (Integer)data[c][r].get();
}
else if (columns[c].format.equals("long")) {
dblCellValues[r][c] = (Long)data[c][r].get();
}
else throw H2O.fail();
} catch (ClassCastException e) {
throw new RuntimeException(e);
}
}
}
return new TwoDimTable(tableHeader, tableDescription, rowHeaders, colHeaders, colTypes, colFormats, colHeaderForRowHeaders, strCellValues, dblCellValues);
}
public final AutoBuffer writeJSON_impl(AutoBuffer ab) {
ab.putJSONStr("name",name);
ab.put1(',');
ab.putJSONStr("description",description);
ab.put1(',');
ab.putJSONStr("columns").put1(':');
ab.put1('[');
if( columns!=null ) {
for (int i = 0; i < columns.length; ++i) {
columns[i].writeJSON(ab);
if (i < columns.length - 1) ab.put1(',');
}
}
ab.put1(']');
ab.put1(',');
ab.putJSON4("rowcount", rowcount);
ab.put1(',');
ab.putJSONStr("data").put1(':');
ab.put1('[');
if( data!=null ) {
for (int i = 0; i < data.length; ++i) {
ab.put1('[');
for (int j = 0; j < data[i].length; ++j) {
if (data[i][j] == null || data[i][j].get() == null) {
ab.putJNULL();
} else {
data[i][j].writeUnwrappedJSON(ab);
}
if (j < data[i].length - 1) ab.put1(',');
}
ab.put1(']');
if (i < data.length - 1) ab.put1(',');
}
}
ab.put1(']');
return ab;
}
}