/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Rosa Venzala 19/09/2008
* @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 16/12/2008
* @version 1.3
* @since JDK1.2
* </p>
*/
package keel.Algorithms.Rule_Learning.Riona;
import java.io.*;
import keel.Dataset.*;
import java.util.*;
import org.core.*;
public class Dataset {
/**
* <p>
* Methods for reading the train & test file
* </p>
*/
private double[][] X = null;
private String [][]X2=null;
private boolean[][] missing = null;
private int[] C = null;
private String[] C2=null;
private double[] eMaximum;
private double[] eMinimum;
private int nData;
private int nVariables;
private int nInPuts;
private int nClasses;
final static boolean debug = false;
private InstanceSet IS;
private int[] commons;
private int []numValues;
private double[][]sortedValuesList;
private int[][][]counter;
private double[][][]SVDM;
private double[][]XSinNor=null;
/**
* <p>
* Return the values of in-put attributes
* </p>
* @return double[][] in-put attributes
*/
public double[][] getX() {
return X;
}
public String[][]getX2(){
return X2;
}
/**
* <p>
* Return the values of the in-put attributes for an instance
* </p>
*
* @param pos The position of the instance
* @return double[] In-put attributes for this instance
*/
public double []getX(int pos){
return X[pos];
}
public double []getXNor(int pos){
normalize();
return X[pos];
}
public InstanceSet getInstanceSet(){
return IS;
}
/**
* <p>
* Return the nominal value of the attribute
* </p>
*/
public String findNominalValue(int attribute,double value){
String str="";
boolean stop=false;
for (int i=0;i<nData&&!stop;i++){
if(value==XSinNor[i][attribute]){str=X2[i][attribute];stop=true;}
}
return str;
}
/**
* <p>
* Check if in the set of the instances the are instances of a determined class
* </p>
* @param whichClass Tha lookinf for class instances
* @return true if there are instances
*/
public boolean existInstanceOfClassC(int whichClass)throws ArrayIndexOutOfBoundsException{
boolean resul=false;
int str;
Instance[] instanceSet;
if (whichClass <0 || whichClass >= nClasses) {throw new ArrayIndexOutOfBoundsException("You are trying to access to "+whichClass+" class and there are only "+nClasses+".");}
instanceSet=IS.getInstances();
//este bucle va a sustituir a la funcion antes definida en instance set
for(int i=0;i<IS.getNumInstances();i++){
str=instanceSet[i].getOutputNominalValuesInt(0);
if(str==whichClass){resul=true;
//System.out.println(i);
}
}
//resul=IS.hayInstanciasDeClaseC(whichClass);
return resul;
}
/**
* <p>
* Gets for each attribute the ordered list of the possible values
* </p>
*/
public double [][]getListValues(){
double [][]valuesList=new double[nInPuts] [nData];
for (int i=0;i<nInPuts;i++){
double []vector=new double [nData];
for(int j=0;j<nData;j++){
vector[j]=X[j][i];
}
vector=removeDuplicated(vector,i);
Arrays.sort(vector,0,numValues[i]);
valuesList[i]=vector;
}
sortedValuesList=valuesList;
return valuesList;
}
private double[] removeDuplicated(double[]v,int attribute){
int counter=0;
boolean bFound;
double[]aux=new double[v.length];
for(int i=0;i<v.length;i++){
bFound=false;
for(int j=0;j<counter&&(!bFound);j++){
if(aux[j]==v[i])bFound=true;//el valor ya esta
}
if(!bFound){aux[counter]=v[i];counter++;}
}
numValues[attribute]=counter;
return aux;//v=aux;
}
/**
* <p>
* Creates a 3D array from training set, stored for each class, each attribute and each
* value the number of examples of the class C witch have the value V for the attribute
* A COUNT[C,V,A]
* </p>
* @return int [][][] Matrix with the number of examples
*/
public int [][][] creaCount(){
counter=new int [nClasses][nInPuts] [nData];
//para definir la matriz Count, nvalores podria ser en el peor caso
// el numero de instancias, es decir, no se repite ningun valor para algun atributo
//ndatos es IS.getNumInstances();
//obtener un array ordenado de los valores de un atributo sin repetirse
//esta seria la nueva funcion getNumValues, la de antes no sirve
//buscar el valor actual en este vector y devolver el indice
double valor_actual;String nominal_actual;int indice;
int []num_valores=new int [nInPuts];
int []tipos=new int [nInPuts];
/*num_valores=*/setNumValues();
for (int i=0; i<nClasses;i++) {for (int j=0; j<nInPuts;j++){
for (int k=0; k<nData;k++)counter[i][j][k]=0;
}}
for (int i=0; i<nClasses;i++) {
for (int j=0; j<nInPuts;j++){
tipos=variableType();
for (int k=0; k<nData;k++){
//System.out.println("clase "+C[k]);
if(C[k]==i){//esta instancia es de la clase actual i
valor_actual=X[k][j];nominal_actual=X2[k][j];
//System.out.println("valor actual es "+X[k][j]);
// System.out.println("valor actual NOMINAL es "+X2[k][j]);
//El atributo es nominal
if(tipos[j]==0)counter[i][j][(int)valor_actual]++;
//para los numericos es cuando tenemos que ordenar
else{
indice=find(sortedValuesList[j],valor_actual);
if(indice==-1)System.err.println("Error: el valor no se encontro en el vector");
counter[i][j][indice]++;
}// System.out.println("la clase atributo y valor "+i+" "+j+" "+(int)valor_actual+" lleva "+Count[i][j][(int)valor_actual]);
}
}}}//de los 3for
return counter;
}
/**
* <p>
* Search and element in an ordered vector
* Returns the index where is the element
* Return -1 if doesn't exist the value
* </p>
*/
private int find(double[]v,double valor){
boolean bFound=false;
int index=-1;
for(int i=0;i<v.length&&(!bFound);i++){
if(v[i]==valor){bFound=true;index=i;}
}
return index;
}
/**
* <p>
* Returns a vector with the class for each pair attribute-value
* </p>
* @return int [][] the vector with the classes
*/
public int [][]getOptimumClass(int [][][]cuonter,long seed){
int [][]optimum=new int[nInPuts] [nData];
int []vector=new int[nClasses];
for (int i=0; i<nInPuts;i++){
for (int j=0; j<numValues[i];j++){
for(int k=0;k<nClasses;k++)vector[k]=cuonter[k][i][j];
optimum[i][j]=getMaximum(vector,seed);
}
}
return optimum;
}
/**
* <p>
* Returns the index where is the maximum of an array of integers
* If there are more than one, returns one of them
* </p>
* @return int the index where is the maximum value
*/
public int getMaximum(int []num, long seed){
Randomize.setSeed(seed);
int max=num[0];
int index=0;
int []options=new int[nClasses];
int counter=0;
options[counter]=0;counter++;
for(int i=1;i<num.length;i++){
if(num[i]>max){max=num[i];index=i;counter=0;options[counter]=i;counter++;}
else{if(num[i]==max){options[counter]=i;counter++;}}
}
if((counter-1)>0){//es que hay mas de una clase que es optima, la elegimos aleatoriamente
index=Randomize.RandintClosed(0, counter);
index=options[index];
}
return index;
}
/**
* <p>
* Returns the index where is the maximum of a double array
* </p>
* If there are more than one. returns one of them
* @return int the index where is the maximum value
*/
public int getMaxim(double []num, long seed){
Randomize.setSeed(seed);
double max=num[0];
int index=0;
int []options=new int[nInPuts];
int counter=0;
options[counter]=0;counter++;
for(int i=1;i<num.length;i++){
if(num[i]>max){max=num[i];index=i;counter=0;options[counter]=i;counter++;}
else{if(num[i]==max){options[counter]=i;counter++;}}
}
if((counter-1) > 0){//es que hay mas de una clase que es optima, la elegimos aleatoriamente
index=Randomize.RandintClosed(0, counter);
System.out.println("Elegimos "+index);
index=options[index];
}
return index;
}
/**
* <p>
* Returns for each attribute the number of attributes for each set of values
* </p>
*/
public void/*int []*/setNumValues(){
//int []num=new int [nentradas];
for(int i=0;i<nInPuts;i++){
numValues[i]=0;
for (int j=1;j<nData;j++){
if((int)X[j][i]> numValues[i])numValues[i]=(int)X[j][i];
}
numValues[i]++;
}
//return num_valores;
}
/**
* <p>
* Returns for each attributes the number of values for the set
* </p>
* @return int [] an array with the different values
*/
public int []getNumValues2(){
return numValues;
}
/**
* <p>
* Returns the values for the out-put(class)
* </p>
* @return int[] An array with the values of the class
*/
public int[] getC() {
int[] retorno = new int[C.length];
for (int i = 0; i < C.length; i++) {
retorno[i] = C[i];
}
return retorno;
}
public String[] getC2() {
String[] retorno = new String[C2.length];
for (int i = 0; i < C2.length; i++) {
retorno[i] = C2[i];
}
return retorno;
}
/**
* <p>
* Returns the value of the attributes of the out-put for an instance
* </p>
* @param pos The position of the instance in the set of values
* @return int The value of the class for that instance
*/
public int getC(int pos){
return C[pos];
}
/**
* <p>
* Returns an array with the minium values of the attributes of the in-put
* </p>
* @return double[] idem
*/
public double[] getEMaximum() {
return eMaximum;
}
/**
* <p>
* Returns an array with the minium values of the in-put attributes
* </p>
* @return double[] idem
*/
public double[] getEMinimum() {
return eMinimum;
}
/**
* <p>
* Returns the number of examples
* </p>
* @return int the numebr of examples
*/
public int getNData() {
return nData;
}
/**
* <p>
* Returns the number of variables
* </p>
* @return int The numebr of variables(in-put and out-put)
*/
public int getNVariables() {
return nVariables;
}
/**
* <p>
* Returns the number of in-put variables
* </p>
* @return int Total variables of in-put
*/
public int getInPuts() {
return nInPuts;
}
/**
* <p>
* Returns the total number of classes
* </p>
* @return int Number of classes
*/
public int getNClasses() {
return nClasses;
}
/**
* <p>
* Checks if an attribute is lost or not
* </p>
* @param i int Number of the example
* @param j int Number of the attribute
* @return boolean True if lost, false otherwise
*/
public boolean isMissing(int i, int j) {
// True is the value is missing (0 in the table)
return missing[i][j];
}
/**
* <p>
* Constructor. Creates a new set of instances.
* </p>
*/
public Dataset() {
IS = new InstanceSet(); // Init a new set of instances
}
/**
* <p>
* Reads the file of examples (Train&Test)
* </p>
* @param samples Name of the file of examples
* @param train True if Train, False is Test
* @throws IOException A possible error de I/O
*/
public void readSet(String samples, boolean train) throws IOException {
try {
// Load in memory a dataset that contains a classification problem
IS.readSet(samples, train);
nData = IS.getNumInstances();
nInPuts = Attributes.getInputNumAttributes();
nVariables = nInPuts + Attributes.getOutputNumAttributes();
/* System.out.println(ndatos);
System.out.println(nentradas);
System.out.println(nvariables);*/
// Check that there is only one output variable
if (Attributes.getOutputNumAttributes() > 1) {
System.out.println(
"This algorithm can not process MIMO datasets");
System.out.println(
"All outputs but the first one will be removed");
System.exit(1); //TERMINAR
}
boolean noOutputs = false;
if (Attributes.getOutputNumAttributes() < 1) {
System.out.println(
"This algorithm can not process datasets without outputs");
System.out.println("Zero-valued output generated");
noOutputs = true;
System.exit(1); //TERMINAR
}
// Initialice and fill our own tables
X = new double[nData][nInPuts];
X2 = new String[nData][nInPuts];
missing = new boolean[nData][nInPuts];
C = new int[nData];
C2=new String[nData];
numValues=new int[nInPuts];
sortedValuesList=new double[nInPuts] [nData];
SVDM=new double[nInPuts][][];
XSinNor = new double[nData][nInPuts];
// Maximum and minimum of inputs
eMaximum = new double[nInPuts];
eMinimum = new double[nInPuts];
// All values are casted into double/integer
nClasses = 0;
for (int i = 0; i < nData; i++) {
Instance inst = IS.getInstance(i);
for (int j = 0; j < nInPuts; j++) {
X2[i][j] = IS.getInputNominalValue(i, j); //inst.getInputRealValues(j);
X[i][j] = IS.getInputNumericValue(i, j);
XSinNor[i][j] = IS.getInputNumericValue(i, j);
// System.out.println(X[i][j]);
missing[i][j] = inst.getInputMissingValues(j);
if (X[i][j] > eMaximum[j] || i == 0) {
eMaximum[j] = X[i][j];
}
if (X[i][j] < eMinimum[j] || i == 0) {
eMinimum[j] = X[i][j];
}
}
if (noOutputs) {
C[i] = 0;
} else {
C[i] = (int)IS.getOutputNumericValue(i, 0);
C2[i] = IS.getOutputNominalValue(i, 0); //(int)inst.getOutputRealValues(i);
}
if (C[i] > nClasses) {
nClasses = C[i];
}
}
nClasses++;
System.out.println("Number of classes=" + nClasses);
//IMPRIME TODOS LOS ATRIBUTOS Y TODAS LAS INSTANCIAS
// IS.print();
} catch (Exception e) {
System.out.println("DBG: Exception in readSet");
e.printStackTrace();
}
}
/**
* <p>
* Returns the header of the file
* </p>
* @return String Header of the file(train)
*/
public String copyTestHeader() {
// Header of the output file
String str = new String("");
str = "@relation " + Attributes.getRelationName() + "\n";
str += Attributes.getInputAttributesHeader();
str += Attributes.getOutputAttributesHeader();
str += Attributes.getInputHeader() + "\n";
str += Attributes.getOutputHeader() + "\n";
str += "@data\n";
return str;
}
/**
* <p>
* Converts all the values of the set into the [0,1] interval
* </p>
*/
public void normalize() {
int attributes = this.getInPuts();
double maximus[] = new double[attributes];
for (int j = 0; j < attributes; j++) {
maximus[j] = 1.0 / (eMaximum[j] - eMinimum[j]);
}
for (int i = 0; i < this.getNData(); i++) {
for (int j = 0; j < attributes; j++) {
if (isMissing(i, j)||(Attributes.getInputAttribute(j).getType() == Attribute.NOMINAL)||(eMaximum[j]==eMinimum[j])) {
; //no escojo este ejemplo
//no normalizamos tampoco si es nominal o si la diferencia entre el
//maximo y minimo es 0, es decir, solo hay un valor, para que no de NaN
} else {
X[i][j] = (X[i][j] - eMinimum[j]) * maximus[j];
}
}
}
}
/**
* <p>
* Returns the types of each in-put (NOMINAL[0] or NUMERICO[1])
* </p>
* @return int[] Vector with 0(nominal) or 1(numeric)
*/
public int[] variableType() {
int[] types = new int[this.nInPuts];
for (int i = 0; i < this.nInPuts; i++) {
types[i] = 1;
if (Attributes.getInputAttribute(i).getType() == Attribute.NOMINAL) {
types[i] = 0;
}
}
return types;
}
/**
* <p>
* Calculates the values for each column and attribute
* </p>
*/
public void calculateMostCommon() {
commons = new int[nInPuts];
int[] aux = new int[nData];
for (int i = 0; i < nInPuts; i++) {
for (int j = 0; j < nData; j++) {
if (this.isMissing(j, i)) {
aux[j] = -1;
} else {
aux[j] = (int) X[j][i];
}
}
Arrays.sort(aux);
int mostCommon = aux[0];
int counter = 1, j;
for (j = 1; (aux[j] == mostCommon) && (j < nData - 1); j++, counter++) {
;
}
int contador2 = 1;
int mascomun2 = aux[j];
if (j + 1 < nData) {
for (j = j + 1; j < nData; j++) {
if (aux[j] == mascomun2) {
contador2++;
} else {
mascomun2 = aux[j];
if (contador2 > counter) {
counter = contador2;
mostCommon = mascomun2;
contador2 = 1;
}
}
}
}
commons[i] = mostCommon;
}
}
/**
* <p>
* Returns the value most comon of the 'i' attribute
* </p>
* @param i int Number of the attribute
* @return int Most comon value for this variable
*/
public int mostCommon(int i) {
return commons[i];
}
/**
* <p>
* Returns the name of the problem variables
* </p>
* @return String[] An array with the name of problem's variables
*/
public String[] giveNames() {
String[] out = new String[nVariables];
for (int i = 0; i < nInPuts; i++) {
out[i] = Attributes.getInputAttribute(i).getName();
}
out[nInPuts] = Attributes.getOutputAttribute(0).getName();
return out;
}
/**
* <p>
* Returns the value of the classes
* </p>
* @return String[] An array with the value for each out-put
*/
public String[] giveClasses(){
String [] out = new String[nClasses];
Attribute attribute = Attributes.getOutputAttribute(0);
if (attribute.getType() == attribute.NOMINAL){
for (int i = 0; i < nClasses; i++) {
out[i] = attribute.getNominalValue(i);
}
}
else{
out = null; //luego guardar�el valor de las clases num�icas
}
return out;
}
/**
* <p>
* Checks if in the class the is any in-put of real type or continous
* </p>
* @return boolean True if exists
*/
public boolean existContinousAttributes(){
return Attributes.hasRealAttributes();
}
/**
* <p>
* Gets the most near example
* </p>
* @return masCercano The position in the set of values of the example most near
*/
public int nearestSample(Complex R,int defaultClass,long seed,int s,int q){
int nearest=-1;
double distance=100000.;
double actual;
int []nearests=new int[nData];
int counternearests=0;
int []nearestMostFrequentClass=new int[nData];
int countNearestMostFrequentClass=0;
// Instance []Iset=IS.getInstances();
boolean existEquals=false;
for(int i=0;i<nData;i++){
if((R.getClassAttribute()==C[i]) && (!R.ruleCoversInstance(X[i]/*Iset[i]*/))){
actual=distance(R,X[i]/*Iset[i]*/,s,q,distance);
if(actual<distance){
distance=actual;
nearest=i;
nearests[0]=i;
counternearests=1;
// System.out.print("SOY "+i+" distancia "+distancia+" ");
}
else{
if(actual==distance){
nearests[counternearests]=i;
counternearests++;
existEquals=true;
// System.out.print("SOY IGUAL"+i+" distancia "+distancia+" ");
}
}
}
}
if(existEquals){
// System.out.println("hay iguales");
boolean hayMostFrequentClass=false;
for(int j=0;j<counternearests;j++){
if(C[nearests[j]]==defaultClass){
nearestMostFrequentClass[countNearestMostFrequentClass]=/*i*/nearests[j];
countNearestMostFrequentClass++;
hayMostFrequentClass=true;
//System.out.print("cl defecto "+j+" ");
}
}
int selec;
Randomize.setSeed(seed);
if(hayMostFrequentClass){
// System.out.print("elegir entre "+contMasCercMostFrequentClass+" ");
selec=Randomize.Randint(0,countNearestMostFrequentClass);
nearest=nearestMostFrequentClass[selec];
// System.out.print(masCercMostFrequentClass[0]+" , "+masCercMostFrequentClass[1]+" , "+masCercMostFrequentClass[2]+" ");
//0an-1
}
else {
selec=Randomize.Randint(0,counternearests);
nearest=nearests[selec];
}
}
//System.out.println("SELECCION "+masCercano);
return nearest;
}
/**
* <p>
* Calculates the distance betwen one rule and an example/instance
* </p>
* @param R the rule
* @param E the example
* @param s parameter to calculate the distance
* @param q parameter to calculate the distance
* @param minDist lowest distance
* @return dist the distance
*/
public double distance(Complex R,double []E,int s,int q,double minDist){
int at;
double dist=0;
Selector se;
double e,r,factor,absolut;
for(int i=0;i<R.size();i++){
se=R.getSelector(i);
at=se.getAttribute();
e=E[at];
if (Attributes.getInputAttribute(at).getType() == Attribute.NOMINAL){
r=se.getZeroValue();
//if(e!=r)dist=dist+1;
for(int c=0;c<nClasses;c++){
//System.out.print(probabilidad(c,at,r));
//System.out.println(probabilidad(c,at,e));
factor=Math.abs(probability(c,at,r)-probability(c,at,e));
factor=Math.pow(factor,q);
//System.out.println(factor);
dist=dist+factor;
}
//System.out.println(" atr "+at+" r "+r+" e "+e);
}
else {
double []values=se.getValues();
if(e > values[1])
factor=e-values[1];
else{if(e < values[0])
factor=values[0]-e;
else factor=0;
}
factor=Math.pow(factor,s);
dist=dist+factor;
}
if(dist>minDist)break;//si la distancia que estamos calculando ya es superior a la minima calculada paramos los calculos, esta distancia no sera considerada porque siempre buscamos la minima distancia
}
return dist;
}
/**
* <p>
* Calculates the distance betwen two examples
* </p>
* @param E the number of the example in the dataset
* @param E_test the example of the test
* @param minDist the lowest distance
* @return dist the distance
*/
public double distance(int E,double []E_test,/*int N,*/double minDist){
double dist=0;
double e,r,factor,absolute;
for(int at=0;at<nInPuts;at++){
e=E_test[at];
r=X[E][at];
if (Attributes.getInputAttribute(at).getType() == Attribute.NOMINAL){
for(int c=0;c<nClasses;c++){
factor=Math.abs(probability(c,at,r/*XSinNor[E][at]*/)-probability(c,at,e/*XSinNor[N][at]*/));
//factor=Math.pow(factor,q);
dist=dist+factor;
}
}
else {
factor=Math.abs(r-e);
//factor=Math.pow(factor,s);
dist=dist+factor;
}
if(dist>minDist)break;//si la distancia que estamos calculando ya es superior a la minima calculada paramos los calculos, esta distancia no sera considerada porque siempre buscamos la minima distancia
}
return dist;
}
/**
* <p>
* Calculates the probability of one example with value 'valor' for the attribute 'attr'
* </p>
* @param classAttribute the class
* @param attribute number of atribute
* @param value the value of the attribute
* @return la probability
*/
private double probability(int classAttribute,int attribute,double value){
int num=0;
int denom=0;
double fraction;
for(int i=0;i<nData;i++){
if(XSinNor[i][attribute]==value){
denom++;
if(C[i]==classAttribute)num++;
}
}
if(denom==0)fraction=0;
else fraction=(double)num/(double)denom;
return fraction;
}
/**
* <p>
* Calculates the class most frecuent in the set of values
* </p>
* @return the number of the class most frecuent
*/
public int mostFrequentClass(long seed){
int frequences[]=new int[nClasses];
for(int i=0;i<nClasses;i++)frequences[i]=0;
for(int i=0;i<nData;i++){
frequences[C[i]]++;
}
int mostFrequent=getMaximum(frequences,seed);
return mostFrequent;
}
/**
* <p>
* Calculates the number of positive examples the math with the rule
* </p>
* @param R the rule
* @return number of positives examples
*/
public int getNumPosExamples(Complex R){
int count=0;
for(int i=0;i<nData;i++){
if(R.ruleCoversInstance(X[i])){
if(R.getClassAttribute()==C[i])count++;
}
}
return count;
}
/**
* <p>
* Calculates the number of negative examples that match with the rule
* </p>
* @param R the rule
* @return number of negative examples
*/
public int getNumNegExamples(Complex R){
int count=0;
for(int i=0;i<nData;i++){
if(R.ruleCoversInstance(X[i])){
if(R.getClassAttribute()!=C[i])count++;
}
}
return count;
}
/**
* <p>
* Calculates the neighbour of one test example
* </p>
* @param test the exmaple of test
* @param k the size of the neighbourhood
*/
public int [] getNeighbourSet(double[] test,int k){
double distances[]=new double[nData];
double distances2[]=new double[nData];
double minDist=100000000;
int []selected=new int[nData];
for(int j=0;j<(nData);j++)selected[j]=-2;
int pos;
for(int i=0;i<(nData);i++){
distances[i]=distance(i,test,/*Numtest,*/minDist);
//System.out.println(i+" dist total: "+distancias[i]);
distances2[i]=distances[i];
}
Arrays.sort(distances,0,nData);
//nos quedamos con los k mas cercanos
//leave-one out methodology j=1 j<=k
//la primera distancia no la queremos siempre sera 0, corresponde a el mismo
for(int j=1;(j<=k || (distances[j]==distances[k/*-1*/]));j++){
pos=lookup(distances2,distances[j],selected);
selected[j-1]=pos;
//System.out.println(elegidos[j]);
}
return selected;
}
/**
* <p>
* Search an element in an ordered vector
* Returns the index if find the value
* Returns -1 otherwise
* </p>
*/
private int lookupInt(int[]v,int value){
boolean found=false;
int index=-1;
for(int i=0;i<v.length&&(!found);i++){
if(v[i]==value){found=true;index=i;}
}
return index;
}
private int lookup(double[]v,double value,int []selected){
boolean found=false;
int index=-1;
int exist;
for(int i=0;i<v.length&&(!found);i++){
exist=lookupInt(selected,i);
if(v[i]==value && exist==-1){found=true;index=i;}
}
return index;
}
/**
* <p>
* Gets a matrix where store for each nominal attribute, the distances betwen all the possible values
* </p>
*/
public void computeSVDM(){
/*int []numV=getNumValues();*/
for(int attr=0;attr<nInPuts;attr++){
if(variableType()[attr]==0){//SOLO SI ES NOMINAL
int nV=numValues[attr];
double dist,factor;
SVDM[attr]=new double[nV][];
for(int i=0;i<nV;i++)SVDM[attr][i]=new double[nV];
for(int i=0;i<nV;i++){
for(int j=i;j<nV;j++){
if(j==i)SVDM[attr][i][j]=0.;
else{
dist=0;
for(int c=0;c<nClasses;c++){
factor=Math.abs(probability(c,attr,(double)i)-probability(c,attr,(double)j));
dist=dist+factor;
}
SVDM[attr][i][j]=dist;
SVDM[attr][j][i]=dist;
}
}
}
}
}
/*for(int a=0;a<nentradas;a++){
int nV=num_valores[a];
for(int b=0;b<nV;b++){
for(int c=0;c<nV;c++)System.out.print(SVDM[a][b][c]+" ");
//System.out.println();
}
//System.out.println();
}*/
}
/**
* <p>
* Creates a vector with the values of the attributes that d(tst[atr],trn[atr])
* </p>
* @param attr the attribute
* @param test the test example
* @param train the train example
*/
public double[]createBall(int attr,double test,double train){
//int []numV=getNumValues();
int nV=numValues[attr];
double[]components=new double[nV];
for(int i=0;i<nV;i++)components[i]=-1;
if(test!=-1){
double radio=SVDM[attr][(int)test][(int)train];
int count=0;
for(int i=0;i<nV;i++){
/*if(atr==0){System.out.println(SVDM[atr][(int)tst][i]);
System.out.println("radio "+radio);
}*/
if(SVDM[attr][(int)test][i]<=radio){components[count]=(double)i;count++;/*System.out.println("el "+i+ "en componentes ");*/}
}
}
else{
if(nV>1){
components[0]=test;
components[1]=train;
}else{ //Julian - If no correspondant example in test/train exists, do not include selector
components = null;
}
}
return components;
}
/**
* <p>
* Gets the real value
* </p>
*/
public double getRealValue(int at,String str){
for(int i=0;i<numValues[at];i++){
String n=findNominalValue(at,(double)i);
if(n.compareTo(str)==0)return (double)i;
}
return (-1.);
}
/**
* <p>
* Returns the class most frecuent of the set of instances
* </p>
*/
public int getMostFrequentClass(){
int [] clasesEval;
clasesEval = getC();
int sampleForClasTrain[] = new int[nClasses];
for (int j = 0; j < nClasses; j++) {
sampleForClasTrain[j] = 0;
for (int i = 0; i < nData; i++) {
if (j == clasesEval[i]) {
sampleForClasTrain[j]++;
}
}}
int clasePorDefecto=0;
for (int i = 0, clase = -1; i < nClasses; i++) {
if (sampleForClasTrain[i] > clase) {
clasePorDefecto = i;
clase = sampleForClasTrain[i];
}
}
return clasePorDefecto;
}
/**
* <p>
* Calculates the k examples most near of the set
* </p>
* @param test the test example
* @param k number of neighbours
*/
public int [] getNN(double[] test,int k){
double distacnes[]=new double[nData];
double distances2[]=new double[nData];
double minDist=100000000;
int []selected=new int[nData];
for(int j=0;j<nData;j++)selected[j]=-2;
int position;
for(int i=0;i<nData;i++){
distacnes[i]=distance(i,test,/*Numtest,*/minDist);
//System.out.println(i+" dist total: "+distancias[i]);
distances2[i]=distacnes[i];
}
Arrays.sort(distacnes,0,nData);
//leave-one out methodology j=1 j<=k
//la primera distancia no la queremos siempre sera 0, corresponde a el mismo
for(int j=1;(j<=k);j++){
position=lookup(distances2,distacnes[j],selected);
selected[j-1]=position;
}
return selected;
}
}