/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.printer;
import rapaio.core.stat.Mean;
import rapaio.core.stat.Quantiles;
import rapaio.data.Frame;
import rapaio.data.Var;
import rapaio.data.VarType;
import rapaio.printer.Printable;
import rapaio.printer.format.TextTable;
import rapaio.sys.WS;
import java.util.Arrays;
import static rapaio.sys.WS.code;
import static rapaio.sys.WS.getPrinter;
/**
* TODO: this class should not exist anymore, the code should be placed at each proper class
*
* @author tutuianu
*/
@Deprecated
public class Summary {
public static String summary(Frame df) {
return summary(df, df.varNames());
}
public static String summary(Frame df, String... names) {
StringBuilder buffer = new StringBuilder();
buffer.append("Frame Summary\n");
buffer.append("=============\n");
if (df == null) {
buffer.append("null instance of frame.\n");
return buffer.toString();
}
buffer.append("* rowCount: ").append(df.rowCount()).append("\n");
buffer.append("* complete: ").append(df.stream().complete().count()).append("/").append(df.rowCount()).append("\n");
buffer.append("* varCount: ").append(df.varCount()).append("\n");
buffer.append("* varNames: \n");
TextTable tt = TextTable.newEmpty(df.varCount(), 5);
for (int i = 0; i < df.varCount(); i++) {
tt.set(i, 0, i + ".", 1);
tt.set(i, 1, df.var(i).name(), 1);
tt.set(i, 2, ":", -1);
tt.set(i, 3, df.var(i).type().code(), -1);
tt.set(i, 4, "|", 1);
}
tt.withMerge();
buffer.append("\n").append(tt.summary()).append("\n");
String[][] first = new String[names.length][7];
String[][] second = new String[names.length][7];
for (int i = 0; i < 7; i++) {
for (int j = 0; j < names.length; j++) {
first[j][i] = " ";
second[j][i] = " ";
}
}
for (int k = 0; k < names.length; k++) {
int i = df.varIndex(names[k]);
Var v = df.var(i);
if (v.type() == VarType.BINARY) {
first[k][0] = "0";
first[k][1] = "1";
first[k][2] = "NA's";
int ones = 0;
int zeros = 0;
int missing = 0;
for (int j = 0; j < v.rowCount(); j++) {
if (v.missing(j)) {
missing++;
} else {
if (v.binary(j))
ones++;
else
zeros++;
}
}
second[k][0] = String.valueOf(zeros);
second[k][1] = String.valueOf(ones);
second[k][2] = String.valueOf(missing);
continue;
}
if (v.type() == VarType.INDEX || v.type() == VarType.NUMERIC) {
double[] p = new double[]{0., 0.25, 0.50, 0.75, 1.00};
double[] perc = Quantiles.from(v, p).values();
double mean = Mean.from(v).value();
int nas = 0;
for (int j = 0; j < df.rowCount(); j++) {
if (v.missing(j)) {
nas++;
}
}
first[k][0] = "Min.";
first[k][1] = "1st Qu.";
first[k][2] = "Median";
first[k][3] = "Mean";
first[k][4] = "2nd Qu.";
first[k][5] = "Max.";
second[k][0] = String.format("%.3f", perc[0]);
second[k][1] = String.format("%.3f", perc[1]);
second[k][2] = String.format("%.3f", perc[2]);
second[k][3] = String.format("%.3f", mean);
second[k][4] = String.format("%.3f", perc[3]);
second[k][5] = String.format("%.3f", perc[4]);
if (nas != 0) {
first[k][6] = "NA's";
second[k][6] = String.format("%d", nas);
}
}
if (v.type().isNominal()) {
int[] hits = new int[v.levels().length];
int[] indexes = new int[v.levels().length];
for (int j = 0; j < df.rowCount(); j++) {
hits[v.index(j)]++;
indexes[v.index(j)] = j;
}
int[] tophit = new int[6];
int[] topindex = new int[6];
for (int j = 1; j < hits.length; j++) {
if (hits[j] != 0) {
for (int l = 0; l < tophit.length; l++) {
if (tophit[l] < hits[j]) {
for (int m = tophit.length - 1; m > l; m--) {
tophit[m] = tophit[m - 1];
topindex[m] = topindex[m - 1];
}
tophit[l] = hits[j];
topindex[l] = j;
break;
}
}
}
}
int nas = 0;
for (int j = 0; j < df.rowCount(); j++) {
if (v.missing(j)) {
nas++;
}
}
int other = df.rowCount();
int pos = 0;
for (int j = 0; j < 6; j++) {
if (tophit[j] != 0) {
other -= tophit[j];
first[k][j] = v.label(indexes[topindex[j]]);
second[k][j] = String.valueOf(tophit[j]);
pos++;
}
}
if (nas != 0) {
if (other - nas != 0) {
if (pos == 6) {
pos--;
}
first[k][pos] = "(Other)";
second[k][pos] = String.valueOf(other - nas);
pos++;
}
first[k][pos] = "NA's";
second[k][pos] = String.valueOf(nas);
} else {
if (other != 0) {
first[k][pos] = "(Other)";
second[k][pos] = String.valueOf(other);
}
}
}
}
// learn layout
int[] width = new int[names.length];
int[] wfirst = new int[names.length];
int[] wsecond = new int[names.length];
for (int i = 0; i < names.length; i++) {
width[i] = names[i].length();
}
for (int j = 0; j < 7; j++) {
for (int i = 0; i < names.length; i++) {
wfirst[i] = Math.max(wfirst[i], first[i][j].length());
wsecond[i] = Math.max(wsecond[i], second[i][j].length());
}
}
for (int i = 0; i < names.length; i++) {
width[i] = Math.max(width[i], wfirst[i] + wsecond[i] + 3);
wfirst[i] = width[i] - 3 - wsecond[i];
}
int witdh = getPrinter().textWidth();
int pos = 0;
while (pos < names.length) {
int last = pos;
int remain = witdh;
while (true) {
if (last < names.length && remain >= width[last]) {
remain -= width[last];
last++;
continue;
}
break;
}
if (last == pos) {
last++;
}
// output text from pos to last
StringBuilder sb = new StringBuilder();
for (int i = pos; i < last; i++) {
String colName = names[i];
if (sb.length() != 0) {
sb.append(" ");
}
sb.append(String.format("%" + width[i] + "s ", colName));
}
buffer.append(sb.toString()).append("\n");
for (int j = 0; j < 7; j++) {
sb.append("\n");
sb = new StringBuilder();
for (int i = pos; i < last; i++) {
if (sb.length() != 0) {
sb.append(" ");
}
sb.append(String.format("%" + wfirst[i] + "s", first[i][j]));
if (" ".equals(first[i][j]) && " ".equals(second[i][j])) {
sb.append(" ");
} else {
sb.append(" : ");
}
sb.append(String.format("%" + wsecond[i] + "s", second[i][j]));
sb.append(" ");
}
buffer.append(sb.toString());
if (last != names.length || j != 6) {
buffer.append("\n");
}
}
pos = last;
}
buffer.append("\n");
return buffer.toString();
}
public static String summary(Var v) {
StringBuilder sb = new StringBuilder();
sb.append("> printSummary(var: ").append(v.name()).append(")\n");
sb.append("name: ").append(v.name()).append("\n");
sb.append("type: ").append(v.type().name()).append("\n");
int complete = (int) v.stream().complete().count();
sb.append("rows: ").append(v.rowCount()).append(", complete: ").append(complete).append(", missing: ").append(v.rowCount() - complete).append("\n");
String[] first = new String[7];
String[] second = new String[7];
for (int i = 0; i < 7; i++) {
first[i] = " ";
second[i] = " ";
}
if (v.type() == VarType.BINARY) {
first[0] = "0";
first[1] = "1";
first[2] = "NA's";
int ones = 0;
int zeros = 0;
int missing = 0;
for (int i = 0; i < v.rowCount(); i++) {
if (v.missing(i)) {
missing++;
} else {
if (v.binary(i))
ones++;
else
zeros++;
}
}
second[0] = String.valueOf(zeros);
second[1] = String.valueOf(ones);
second[2] = String.valueOf(missing);
}
if (v.type() == VarType.INDEX || v.type() == VarType.NUMERIC) {
double[] p = new double[]{0., 0.25, 0.50, 0.75, 1.00};
double[] perc = Quantiles.from(v, p).values();
double mean = Mean.from(v).value();
int nas = 0;
for (int j = 0; j < v.rowCount(); j++) {
if (v.missing(j)) {
nas++;
}
}
first[0] = "Min.";
first[1] = "1st Qu.";
first[2] = "Median";
first[3] = "Mean";
first[4] = "2nd Qu.";
first[5] = "Max.";
second[0] = String.format("%.3f", perc[0]);
second[1] = String.format("%.3f", perc[1]);
second[2] = String.format("%.3f", perc[2]);
second[3] = String.format("%.3f", mean);
second[4] = String.format("%.3f", perc[3]);
second[5] = String.format("%.3f", perc[4]);
if (nas != 0) {
first[6] = "NA's";
second[6] = String.format("%d", nas);
}
}
if (v.type().isNominal()) {
int[] hits = new int[v.rowCount() + 1];
int[] indexes = new int[v.rowCount() + 1];
for (int j = 0; j < v.rowCount(); j++) {
hits[v.index(j)]++;
indexes[v.index(j)] = j;
}
int[] tophit = new int[6];
int[] topindex = new int[6];
for (int j = 1; j < hits.length; j++) {
if (hits[j] != 0) {
for (int l = 0; l < tophit.length; l++) {
if (tophit[l] < hits[j]) {
for (int m = tophit.length - 1; m > l; m--) {
tophit[m] = tophit[m - 1];
topindex[m] = topindex[m - 1];
}
tophit[l] = hits[j];
topindex[l] = j;
break;
}
}
}
}
int nas = 0;
for (int j = 0; j < v.rowCount(); j++) {
if (v.missing(j)) {
nas++;
}
}
int other = v.rowCount();
int pos = 0;
for (int j = 0; j < 6; j++) {
if (tophit[j] != 0) {
other -= tophit[j];
first[j] = v.label(indexes[topindex[j]]);
second[j] = String.valueOf(tophit[j]);
pos++;
}
}
if (nas != 0) {
if (other - nas != 0) {
if (pos == 6) {
pos--;
}
first[pos] = "(Other)";
second[pos] = String.valueOf(other - nas);
pos++;
}
first[pos] = "NA's";
second[pos] = String.valueOf(nas);
} else {
if (other != 0) {
first[pos] = "(Other)";
second[pos] = String.valueOf(other);
}
}
}
// learn layout
int wfirst = 0;
int wsecond = 0;
for (int j = 0; j < 7; j++) {
wfirst = Math.max(wfirst, first[j].length());
wsecond = Math.max(wsecond, second[j].length());
}
// output text from pos to last
for (int j = 0; j < 7; j++) {
StringBuilder sb2 = new StringBuilder();
sb2.append(String.format("%" + wfirst + "s", first[j]));
if (" ".equals(first[j]) && " ".equals(second[j])) {
sb2.append(" ");
} else {
sb2.append(" : ");
}
sb2.append(String.format("%" + wsecond + "s", second[j]));
sb2.append("\n");
String next = sb2.toString();
if (!next.trim().isEmpty())
sb.append(next);
}
return sb.toString();
}
public static void printNames(Frame df) {
StringBuilder buffer = new StringBuilder();
buffer.append("\n > names(frame)\n");
for (int i = 0; i < df.varCount(); i++) {
buffer.append(df.varNames()[i]).append("\n");
}
code(buffer.toString());
}
public static void printSummary(Printable result) {
result.printSummary();
}
public static void lines(boolean merge, Var v) {
head(merge, v.rowCount(), new Var[]{v}, new String[]{""});
}
public static void head(boolean merge, int lines, Var v) {
head(merge, lines, new Var[]{v}, new String[]{""});
}
public static void lines(Frame df) {
lines(true, df);
}
public static void lines(boolean merge, Frame df) {
Var[] vars = new Var[df.varCount()];
String[] names = df.varNames();
for (int i = 0; i < vars.length; i++) {
vars[i] = df.var(i);
}
head(merge, df.rowCount(), vars, names);
}
public static void head(boolean merge, int lines, Frame df) {
Var[] vars = new Var[df.varCount()];
String[] names = df.varNames();
for (int i = 0; i < vars.length; i++) {
vars[i] = df.var(i);
}
head(merge, Math.min(lines, df.rowCount()), vars, names);
}
public static void head(boolean merge, int lines, Var[] vars, String[] names) {
WS.code(headString(merge, lines, vars, names));
}
public static String headString(Frame df) {
return headString(true, df.rowCount(), df.varStream().toArray(Var[]::new), df.varNames());
}
public static String headString(boolean merge, Frame df) {
return headString(merge, df.rowCount(), df.varStream().toArray(Var[]::new), df.varNames());
}
public static String headString(int lines, Var[] vars, String[] names) {
return headString(true, lines, vars, names);
}
public static String headString(boolean merge, int lines, Var[] vars, String[] names) {
if (lines == -1) {
lines = vars[0].rowCount();
}
int[] max = new int[vars.length];
for (int i = 0; i < vars.length; i++) {
max[i] = names[i].length() + 1;
for (int j = 0; j < vars[i].rowCount(); j++) {
if (vars[i].type().isNominal() && max[i] < vars[i].label(j).length()) {
max[i] = vars[i].label(j).length();
}
if (vars[i].type().isNumeric()) {
String value = vars[i].type() == VarType.NUMERIC ?
String.format("%.10f", vars[i].value(j)) :
String.format("%d", vars[i].index(j));
if (max[i] < value.length()) {
max[i] = value.length();
}
}
}
}
StringBuilder sb = new StringBuilder();
int pos = 0;
while (pos < vars.length) {
int maxWidth = getPrinter().textWidth();
int width = 0;
int start = pos;
while ((pos < vars.length - 1) && (width + max[pos + 1] + 1 < maxWidth)) {
width += max[pos + 1] + 1;
pos++;
}
for (int j = start; j <= pos; j++) {
String value = String.format("%" + max[j] + "s", names[j]);
sb.append(value).append(" ");
}
sb.append("\n");
for (int i = 0; i < lines; i++) {
for (int j = start; j <= pos; j++) {
String value;
if (vars[j].type().isNominal()) {
value = String.format("%" + max[j] + "s", vars[j].label(i));
} else {
value = String.format("%" + max[j] + "s",
vars[j].type() == VarType.NUMERIC ?
String.format("%.6f", vars[j].value(i)) :
String.format("%d", vars[j].index(i))
);
}
sb.append(value).append(" ");
}
sb.append("\n");
}
pos++;
sb.append("\n");
}
// return sb.toString();
TextTable tt = TextTable.newEmpty(lines + 1, vars.length + 1);
if (merge)
tt.withMerge(getPrinter().textWidth());
tt.withHeaderRows(1);
tt.withHeaderCols(1);
for (int i = 0; i < vars.length; i++) {
tt.set(0, i + 1, names[i], 0);
}
for (int i = 0; i < lines; i++) {
tt.set(i + 1, 0, "[" + i + "]", 1);
}
for (int i = 0; i < lines; i++) {
for (int j = 0; j < vars.length; j++) {
tt.set(i + 1, j + 1, vars[j].label(i), 1);
}
}
return tt.summary();
}
}