package GeDBIT.index.algorithms;
import cern.colt.matrix.DoubleMatrix2D;
import cern.colt.matrix.impl.DenseDoubleMatrix2D;
import cern.colt.matrix.linalg.Algebra;
//改一下看看cvs有没有变
public class Selection {
// 返回的pivots[i]应该是x以0开始计数的序号。即第一个x的序号是0
int m1_p; // p for model 1
double m1_x[][];
double rss2, rss1;
int m2_p; // p for model 2
double m2_x[][];
double x[][]; // array that comes from arguments
double y[][]; // array that comes from arguments
DoubleMatrix2D y_matrix; // will be used when calculate the init rss
int p;
int n;
boolean choose[];
int pnum;
int testSign; // 判定用什么评判标准(F-test or sth.)
// boolean t_choose[]; //在枚举法的时候用到的记录数组,用来记录某个点是否选过
int chose_x[]; // 记录选到的点
int result_x[]; // 记录结果
// double minRss; //枚举法
double maxf; // used in forward
double minf; // used in backward
int nextv; // used in both f and b
double temprss;
double minrss; // used in f, b, and enum
void setTestSign(int s) {
testSign = s;
}
public Selection(double[][] matrix_x, double[][] matrix_y, int num) {
x = matrix_x;
y = matrix_y;
pnum = num;
y_matrix = new DenseDoubleMatrix2D(y);
n = y_matrix.rows();
DoubleMatrix2D x_matrix = new DenseDoubleMatrix2D(x);
p = x_matrix.columns() - 1;
for (int i = 0; i < n; i++)
x[i][0] = 1;
}
double calF(int v) {
// rss2 = calRss(m2x, m2p);
// int v = n-m2p-1;
return (rss1 - rss2) / (rss2 / v);
}
double calRss(double[][] m_x, int m_p) {
Algebra alg = new Algebra();
// 计算beta,
DoubleMatrix2D model_x = new DenseDoubleMatrix2D(m_x);
DoubleMatrix2D transpose_matrix = alg.transpose(model_x); // x'
DoubleMatrix2D mul_matrix = alg.mult(transpose_matrix, model_x); // x'x
DoubleMatrix2D inverse_matrix = alg.inverse(mul_matrix); // (x'x)-1
// print(inverse_matrix);
// System.out.println();
mul_matrix = alg.mult(inverse_matrix, transpose_matrix); // (x'x)-1x'
// print(mul_matrix);
// System.out.println();
DoubleMatrix2D beta = alg.mult(mul_matrix, y_matrix); // (x'x)-1x'y
// print(beta);
// System.out.println();
// 计算rss
mul_matrix = alg.mult(model_x, beta); // xbeta
// print(mul_matrix);
// System.out.println();
DoubleMatrix2D temp_matrix = new DenseDoubleMatrix2D(n, 1); // y-xbeta
double k;
for (int i = 0; i < n; i++) {
k = y_matrix.getQuick(i, 0) - mul_matrix.getQuick(i, 0);
temp_matrix.setQuick(i, 0, k);
}
// print(temp_matrix);
// System.out.println();
// inverse_matrix = alg.inverse(temp_matrix); //(y-xbeta)'
transpose_matrix = alg.transpose(temp_matrix); // (y-xbeta)'
// print(transpose_matrix);
// System.out.println();
mul_matrix = alg.mult(transpose_matrix, temp_matrix); // (y-xbeta)'(y-xbeta)
// print(mul_matrix);
// System.out.println();
return mul_matrix.getQuick(0, 0);
}
int forwardInit() {
m1_p = 0; // model1一开始没有变量
m2_p = 1; // model2一开始有一个变量
// choose数组,用来标志哪个变量已经选入
choose = new boolean[p + 1];
for (int i = 0; i <= p; i++)
choose[i] = false;
// 初始model中只有beta一项,全1
m2_x = new double[n][2];
for (int i = 0; i < n; i++)
m2_x[i][0] = x[i][0];
// 算初始rss1,就是第一个只有beta0项的空模型的rss
/*
* 因为rss是针对每个模型。 所以在每一步循环中,model1的rss就是上一步选出来的model2的rss。
* 所以可以把上一步的rss记录下来,就不用在每次循环中再次计算rss1了。
*/
double avgy = y_matrix.zSum() / y_matrix.size(); // beta = avgy
DoubleMatrix2D minus_matrix = new DenseDoubleMatrix2D(n, 1); // y-beta
for (int i = 0; i < n; i++) {
double temp = y_matrix.get(i, 0) - avgy; // y-beta
minus_matrix.set(i, 0, temp);
}
DoubleMatrix2D transpose_minus = (new Algebra())
.transpose(minus_matrix); // (y-beta)T
DoubleMatrix2D mult = (new Algebra()).mult(transpose_minus,
minus_matrix); // (y-beta)T*(y-beta)
rss1 = mult.getQuick(0, 0);
// rss2会在以后算
rss2 = 0;
return 0;
}
boolean forwardEnd() {
// 这个是判定结束条件
if (m2_p > pnum)
return true;
return false;
}
int[] forwardSelection() {
forwardInit();
// 计算函数
while (!forwardEnd()) {
// 这三个是用来记录最大值以及得出最大值的变量
// double maxf = 0;
// int nextv = 0;
// double temprss = 0.0;
maxf = 0;
nextv = 0;
temprss = 0;
minrss = 999999999E100;
for (int i = 1; i <= p; i++) { // 找下一个变量
if (choose[i]) // 找出一个没选过的
continue;
for (int j = 0; j < n; j++)
// 把下个变量的x列加入到m2模型中
m2_x[j][m2_p] = x[j][i];
forwardTest(m2_x, m2_p, i); // 传入到检测函数
}
choose[nextv] = true; // 把找出来的x标记为已选
m1_p = m2_p; // 把model2变成下一轮的model1
m1_x = new double[n][m1_p + 1];
for (int i = 0; i < n; i++)
for (int j = 0; j < m1_p; j++)
m1_x[i][j] = m2_x[i][j];
for (int i = 0; i < n; i++)
// 最后一列要变成我们选中的x列
m1_x[i][m1_p] = x[i][nextv];
m2_p++; // model2的变量个数要增加
m2_x = new double[n][m2_p + 1];
for (int i = 0; i < n; i++)
for (int j = 0; j < m2_p; j++)
m2_x[i][j] = m1_x[i][j];
rss1 = temprss; // rss互换,下次就不用再计算了
}
int[] result = new int[pnum];
int j = 0;
for (int i = 1; i <= p; i++)
if (choose[i])
result[j++] = i - 1;
return result;
}
void forwardTest(double[][] m2x, int m2p, int point) {
// double result = 0;
if (testSign == 1) {
rss2 = calRss(m2x, m2p);
int v = n - m2p - 1;
// result = calF(v);
double f = calF(v);
if (f > maxf) {
maxf = f;
nextv = point;
temprss = rss2;
}
} else if (testSign == 2) {
rss2 = calRss(m2x, m2p);
if (rss2 < minrss) {
minrss = rss2;
nextv = point;
}
}
// return result;
}
/*
* double testModel(double[][] m2x, int m2p){ //根据testSign的值来选择用什么方法来检测
* double result = 0; if(testSign == 1) result = calF(m2x, m2p); return
* result; }
*/
/*
* double calF(double[][] m2x, int m2p){ Algebra alg = new Algebra();
* //计算beta, DoubleMatrix2D model2_x = new DenseDoubleMatrix2D(m2x);
* //System.out.println("m2"); //print(model2_x); DoubleMatrix2D
* transpose_matrix = alg.transpose(model2_x); //x'
* //System.out.println("trans"); //print(transpose_matrix); DoubleMatrix2D
* mul_matrix = alg.mult(transpose_matrix, model2_x); //x'x
* //System.out.println("mul_matrix"); //print(mul_matrix); DoubleMatrix2D
* inverse_matrix = alg.inverse(mul_matrix); //(x'x)-1
* //print(inverse_matrix); //System.out.println(); mul_matrix =
* alg.mult(inverse_matrix, transpose_matrix); //(x'x)-1x'
* //print(mul_matrix); //System.out.println(); DoubleMatrix2D beta =
* alg.mult(mul_matrix, y_matrix); //(x'x)-1x'y //print(beta);
* //System.out.println();
*
* //计算rss mul_matrix = alg.mult(model2_x, beta); //xbeta
* //print(mul_matrix); //System.out.println(); DoubleMatrix2D temp_matrix =
* new DenseDoubleMatrix2D(n, 1); //y-xbeta double k; for(int i=0; i<n;
* i++){ k = y_matrix.getQuick(i, 0) - mul_matrix.getQuick(i, 0);
* temp_matrix.setQuick(i, 0, k); } //print(temp_matrix);
* //System.out.println(); //inverse_matrix = alg.inverse(temp_matrix);
* //(y-xbeta)' transpose_matrix = alg.transpose(temp_matrix); //(y-xbeta)'
* //print(transpose_matrix); //System.out.println(); mul_matrix =
* alg.mult(transpose_matrix, temp_matrix); //(y-xbeta)'(y-xbeta)
* //print(mul_matrix); //System.out.println(); rss2 =
* mul_matrix.getQuick(0, 0);
*
* int v = n-m2p-1; return (rss1-rss2)/(rss2/v); }
*/
/*
* boolean bigger(double v1, double v2){ //判断double型的大小 BigDecimal d1 = new
* BigDecimal(v1); BigDecimal d2 = new BigDecimal(v2);
* if(d1.compareTo(d2)>0) return true; else return false; }
*/
int[] enumerateSelection() {
// 比较rss,选出最小的rss
enumerateInit();
// 当前已经选了0个点,第一个点是0
enumerateRecurSelect(0, 0);
// 算出来以后result_x里就是所选的点
int[] result = new int[pnum];
for (int i = 1; i <= pnum; i++)
result[i - 1] = result_x[i] - 1;
/*
* System.out.println("result:"); for(int i=0; i<pnum; i++)
* System.out.print(result[i]+" "); System.out.println();
*/
return result;
}
// num表示现在已经选了多少个点,point表示上次选的点
// 记着,选的点是从1开始计数的
void enumerateRecurSelect(int num, int point) {
if (num >= pnum) // 选够点了,算rss
{
double rss = enumerateTest(chose_x);
if (rss < minrss) // 比较,记录最小的rss,保存数组
{
for (int i = 0; i <= pnum; i++)
result_x[i] = chose_x[i];
minrss = rss;
}
} else // 没选够点,选下一个点
{
for (int i = point + 1; i <= p; i++) // 枚举下一个点
{
// t_choose[i] = true;
chose_x[num + 1] = i; // 加入现在枚举的点
enumerateRecurSelect(num + 1, i); // 进入下一轮递归
// t_choose[i] = false;
chose_x[num + 1] = 0; // 回溯,还原(其实应该不用)
}
}
}
double enumerateTest(int[] arr) {
// 先把各个点提取出来,组成x矩阵
// 一个n行pnum+1列的矩阵(因为只选择了pnum个参数加入进去,加上x0的列)
double[][] enum_x = new double[n][pnum + 1];
// int x_i = 0; //enum_x的计数器
for (int i = 0; i <= pnum; i++)
for (int j = 0; j < n; j++)
enum_x[j][i] = x[j][arr[i]];
return calRss(enum_x, pnum);
}
/*
* double enumerateCalRss(int[] arr) { //先把各个点提取出来,组成x矩阵
* //一个n行pnum+1列的矩阵(因为只选择了pnum个参数加入进去,加上x0的列) double[][] enum_x = new
* double[n][pnum+1]; //int x_i = 0; //enum_x的计数器 for(int i=0; i<=pnum; i++)
* for(int j=0; j<n; j++) enum_x[j][i] = x[j][arr[i]];
*
* //然后通过x矩阵算出rss Algebra alg = new Algebra(); //beta DoubleMatrix2D model_x
* = new DenseDoubleMatrix2D(enum_x); //x DoubleMatrix2D transpose_matrix =
* alg.transpose(model_x); //x' DoubleMatrix2D mul_matrix =
* alg.mult(transpose_matrix, model_x); //x'x
* //System.out.println("mul_matrix"); //print(mul_matrix); DoubleMatrix2D
* inverse_matrix = alg.inverse(mul_matrix); //(x'x)-1 mul_matrix =
* alg.mult(inverse_matrix, transpose_matrix); //(x'x)-1x' DoubleMatrix2D
* beta = alg.mult(mul_matrix, y_matrix); //(x'x)-1x'y
*
* //rss mul_matrix = alg.mult(model_x, beta); //xbeta DoubleMatrix2D
* temp_matrix = new DenseDoubleMatrix2D(n, 1); //y-xbeta double k; for(int
* i=0; i<n; i++) { k = y_matrix.getQuick(i, 0) - mul_matrix.getQuick(i, 0);
* temp_matrix.setQuick(i, 0, k); } transpose_matrix =
* alg.transpose(temp_matrix); //(y-xbeta)' mul_matrix =
* alg.mult(transpose_matrix, temp_matrix); //(y-xbeta)'(y-xbeta)
*
* return mul_matrix.getQuick(0, 0); }
*/
int enumerateInit() {
/*
* y_matrix = new DenseDoubleMatrix2D(y); n = y_matrix.rows();
* DoubleMatrix2D x_matrix = new DenseDoubleMatrix2D(x); p =
* x_matrix.columns()-1;
*/
/*
* for(int i=1; i<n; i++) x[i][0] = 1;
*/
minrss = 999999999;
chose_x = new int[pnum + 1];
result_x = new int[pnum + 1];
// choose = new boolean[p+1];
chose_x[0] = 0; // 第一个为0,就是一定要把第一个常数项选中
return 0;
}
int backwardInit() {
m1_p = p - 1;
m2_p = p;
choose = new boolean[p + 1];
for (int i = 0; i <= p; i++)
choose[i] = false;
// m1只要new就好了,到后面计算再赋值
m1_x = new double[n][m1_p + 1];
rss2 = calRss(x, p);
rss1 = 0.0;
return 0;
}
boolean backwardEnd() {
// m2_p代表了此阶段模型里的变量数目。当m2_p等于pnum的时候,结束。
if (m2_p > pnum)
return false;
return true;
}
void backwardTest(double[][] m1x, int m1p, int point) {
if (testSign == 1) // f-test
{
rss1 = calRss(m1x, m1p);
int v = n - m2_p - 1;
double f = calF(v);
// System.out.println(f);
if (f < minf) {
minf = f;
nextv = point;
temprss = rss1;
}
} else if (testSign == 2) // rss
{
rss1 = calRss(m1x, m1p);
if (rss1 < minrss) {
minrss = rss1;
nextv = point;
}
}
}
int[] backwardSelection()
// 因为大模型为m2,小模型为m1。
// 所以这里上一步的模型为m2,下一步的模型为m1。
// 和forward正好相反
{
backwardInit();
while (!backwardEnd()) {
// debugPrint();
// System.out.println("in selection");
// double minf = 999999999;
// int nextv = 0;
// double temprss = 0.0;
minf = 999999999E100;
minrss = 999999999E100;
nextv = 0;
temprss = 0;
for (int i = 1; i <= p; i++) {
if (choose[i])
continue;
// 把所有没有选择丢弃的变量拷到m1中
// 即所有choose[j]为false的变量
int k = 0;
// System.out.println("m1p :"+m1_p);
// System.out.println("p :"+p);
// System.out.println("I :"+i);
// System.out.println();
for (int j = 0; j <= p; j++) {
if (!(choose[j]) && (j != i)) // 当前选择的也不拷贝
{
// System.out.println("j :"+j);
// System.out.println("k :"+k);
for (int row = 0; row < n; row++)
// 把j列拷贝过去
m1_x[row][k] = x[row][j];
k++;
}
}
// double f = backwardTest(m1_x, m1_p);
backwardTest(m1_x, m1_p, i);
/*
* if(f<minf) { minf = f; nextv = i; temprss = rss1; }
*/
}
choose[nextv] = true;
// System.out.println("choose: "+nextv);
// System.out.println();
// 不用把model1拷成model2,只要把rss记录下来就好了。m2_p也要变化。
m2_p--;
m1_p--;
m1_x = new double[n][m1_p + 1];
rss2 = temprss;
// System.out.println("choose "+nextv);
// System.out.println("rss is :"+rss1);
// System.out.println();
}
int[] result = new int[pnum];
int j = 0;
for (int i = 1; i <= p; i++)
if (!choose[i]) // 没有被选中,剩下的才是我们想要的
// result[j++] = i;
result[j++] = i - 1;
/*
* System.out.println("result:"); for(int i=0; i<result.length; i++)
* System.out.print(result[i]+" "); System.out.println();
*/
return result;
}
void debugPrint() {
System.out.println("choose array: ");
for (int i = 0; i <= p; i++)
if (!choose[i])
System.out.print(i + " ");
System.out.println();
// System.out.println("")
}
void print(final DoubleMatrix2D matrix) {
// 这是用来调试的函数
for (int i = 0; i < matrix.rows(); i++) {
for (int j = 0; j < matrix.columns(); j++)
System.out.print(matrix.getQuick(i, j) + " ");
System.out.println();
}
System.out.println("row :" + matrix.rows());
System.out.println("col :" + matrix.columns());
}
}