package weka.classifiers.rules;
import java.lang.*;
import java.io.*;
import java.util.*;
import weka.core.Instances;
public class L3implementation implements Serializable {
int CLASS_NUM = 200;
String[] class_labels;
public int num_features;
public int num_samples;
public int num_classes;
public void get_header(String dataset_name, String format) {
try {
if (format.equalsIgnoreCase("arff")== true) { // let's suppose arff format
// count the number of rows starting with "@attribute" and subtract
// the last one refering to the class labels.
FileReader file = new FileReader(dataset_name);
BufferedReader buff = new BufferedReader(file);
boolean eof = false;
boolean start_count_samples = false;
String line = null;
String s = null;
String s2 = null;
StringTokenizer st, st2;
int count_genes = 0; // number of "@attribute" string occurences
int count_classes = 0; // number of class labels occurences in the last attribute definition
int count_samples = 0; // number of samples equal to the number of lines after "@data" line
// read the input file
while (!eof) {
line = buff.readLine();
// System.out.println("la riga letta �: "+line2);
if (line == null)
eof = true;
else {
st = new StringTokenizer(line);
while (st.hasMoreTokens()) {
s = st.nextToken();
if (s.equalsIgnoreCase("@attribute"))
count_genes++;
if (s.indexOf("{")!= -1) {
// s contains the list of possibile class labels
st2 = new StringTokenizer(s, ",");
while (st2.hasMoreTokens()) {
s2 = st2.nextToken();
//class_labels[count_classes]= new String(s2);
count_classes++;
}
}
if (start_count_samples == true)
count_samples++;
if (s.equalsIgnoreCase("@data")== true)
start_count_samples = true;
}
} // end else if (line2 == null)
} // end while (!eof)
num_features = count_genes-1;
//num_classes = count_classes;
num_samples = count_samples;
} // end else: arff format
else {
// let's suppose csv format
FileReader file = new FileReader(dataset_name);
BufferedReader buff = new BufferedReader(file);
boolean eof = false;
boolean start_count_samples = false;
String line = null;
String s = null;
String s2 = null;
boolean firstline = true;
StringTokenizer st, st2;
int count_features = 0;
int count_samples = 0;
int count_classes = 0;
// read the input file
while (!eof) {
line = buff.readLine();
// System.out.println("la riga letta �: "+line2);
if (line == null)
eof = true;
else {
if (firstline) {
st = new StringTokenizer(line, ",");
while (st.hasMoreTokens()) {
s = st.nextToken();
count_features++;
}
count_features--; // ignore class attribute
} else {
count_samples++;
}
} // end else
} // end while (!eof)
num_features = count_features;
//num_classes = count_classes;
num_samples = count_samples;
}
} catch (IOException e) {
e.getMessage();
}
}
public String[] read_Class_Labels_not_well_formed(String dataset_name, String format) {
String[] temp = new String[100];
FileReader file;
boolean eof = false;
boolean start = false;
String line = null;
String line2 = null;
StringTokenizer st;
int count_features = 0;
num_classes = 0;
try {
file = new FileReader(dataset_name);
BufferedReader buff = new BufferedReader(file);
if (format.equalsIgnoreCase("Arff")==true) {
while (!eof) {
line = buff.readLine();
if (line == null)
eof = true;
else {
if (start == true) {
st = new StringTokenizer(line, ",");
while (st.hasMoreTokens()) {
String s = st.nextToken();
if (count_features == num_features) {
// check if the class label already exists
boolean found = false;
for (int i = 0; i<num_classes; i++)
if (temp[i].equalsIgnoreCase(s)== true) found = true;
if (!found) {
num_classes++;
temp[num_classes-1] = new String(s);
}
}
count_features++;
}
}
if ((line.equalsIgnoreCase("@data")== true))
start = true; //from the next cycle let's start reading!
}
count_features = 0;
}
} // end if (format == arff)
if (format.equalsIgnoreCase("csv")==true) {
// read the first header line counting the number of features
line = buff.readLine();
if (line == null)
return null; // error!!
num_features = -1;
st = new StringTokenizer(line, ",");
while (st.hasMoreTokens()) {
String s2 = st.nextToken();
num_features++;
}
while (!eof) {
line = buff.readLine();
if (line == null)
eof = true;
else {
st = new StringTokenizer(line, ",");
while (st.hasMoreTokens()) {
String s = st.nextToken();
if (count_features == num_features) {
// check if the class label already exists
boolean found = false;
for (int i = 0; i<num_classes; i++)
if (temp[i].equalsIgnoreCase(s)== true) found = true;
if (!found) {
num_classes++;
temp[num_classes-1] = new String(s);
}
}
count_features++;
}
}
count_features = 0;
}
} // end if (format == csv)
if (format.equalsIgnoreCase("data")==true) {
// read the first header line counting the number of features
line = buff.readLine();
if (line == null)
return null; // error!!
num_features = -1;
st = new StringTokenizer(line, ",");
while (st.hasMoreTokens()) {
String s2 = st.nextToken();
num_features++;
}
// restart reading the file
file.close();
buff.close();
file = new FileReader(dataset_name);
buff = new BufferedReader(file);
while (!eof) {
line = buff.readLine();
if (line == null)
eof = true;
else {
st = new StringTokenizer(line, ",");
while (st.hasMoreTokens()) {
String s = st.nextToken();
if (count_features == num_features) {
// check if the class label already exists
boolean found = false;
for (int i = 0; i<num_classes; i++)
if (temp[i].equalsIgnoreCase(s)== true) found = true;
if (!found) {
num_classes++;
temp[num_classes-1] = new String(s);
}
}
count_features++;
}
}
count_features = 0;
}
} // end if (format == data)
} catch (IOException e) {
e.printStackTrace();
}
// copy temp values in class_labels vector
class_labels = new String[num_classes];
for (int i = 0; i< num_classes; i++)
class_labels[i] = new String(temp[i]);
return class_labels;
}
String[] read_Class_Labels_well_formed(String dataset_name, String format) {
class_labels = new String[num_classes];
try {
if (format.equalsIgnoreCase("arff")== true) { // let's suppose arff format
// count the number of rows starting with "@attribute" and subtract
// the last one refering to the class labels.
FileReader file = new FileReader(dataset_name);
BufferedReader buff = new BufferedReader(file);
boolean eof = false;
boolean start_count_samples = false;
String line = null;
String s = null;
String s2 = null;
StringTokenizer st, st2;
int count_genes = 0; // number of "@attribute" string occurences
int count_classes = 0; // number of class labels occurences in the last attribute definition
int count_samples = 0; // number of samples equal to the number of lines after "@data" line
// read the input file
while (!eof) {
line = buff.readLine();
if (line == null)
eof = true;
else {
st = new StringTokenizer(line);
while (st.hasMoreTokens()) {
s = st.nextToken();
if (s.equalsIgnoreCase("@attribute"))
count_genes++;
if (s.indexOf("{")!= -1) {
// s contains the list of possibile class labels
st2 = new StringTokenizer(s, ",");
while (st2.hasMoreTokens()) {
s2 = st2.nextToken();
if (s2.indexOf("{")!= -1)
s2 = s2.substring(s2.indexOf("{")+1, s2.length());
if (s2.indexOf("}")!= -1)
s2 = s2.substring(0, s2.indexOf("}"));
class_labels[count_classes]= new String(s2);
count_classes++;
}
}
}
} // end else if (line2 == null)
} // end while (!eof)
} // end else: arff format
} catch (IOException e) {
e.getMessage();
}
return class_labels;
}
public Hashtable creabin(String filename, int label, String[] class_labels, Hashtable hash, String class_path, String bin_name) {
// read transactions file and save them into a binary file
RandomAccessFile file;
BinaryFile binFile;
// set the endian mode to LITTLE_ENDIAN
final short endian = BinaryFile.LITTLE_ENDIAN;
// set the signed mode to unsigned
final boolean signed = false;
boolean eof = false;
int j = 0;
BufferedReader buff_rd;
FileReader file_rd;
FileWriter cl_file = null;
boolean start = false; // start to read after "@data" line
int attrib_counter= 0;
String line2 = null;
StringTokenizer st;
Integer n2;
String myStr2 = null;
int current_value = 0;
long t_id = 0;
long c_id = 0;
try {
file_rd = new FileReader(filename);
buff_rd = new BufferedReader(file_rd);
file = new RandomAccessFile(bin_name, "rw");
binFile = new BinaryFile(file);
// set the endian mode to LITTLE_ENDIAN
try {
binFile.setEndian(BinaryFile.LITTLE_ENDIAN);
} catch (Exception e) {
e.printStackTrace();
}
// set the signed mode to unsigned
binFile.setSigned(false);
if (label == 1) {
// create also class labels file
cl_file = new FileWriter(class_path);
cl_file.write(Integer.toString(CLASS_NUM)+"\n");
for (int i = 0; i< num_classes; i++) {
cl_file.write(class_labels[i]+"\n");
}
cl_file.close();
}
while (!eof) {
line2 = buff_rd.readLine();
if (line2 == null)
eof = true;
else {
if (start == true) {
st = new StringTokenizer(line2, ",");
binFile.writeDWord(t_id);
binFile.writeDWord(c_id);
t_id++;
c_id++;
// write X number of objects to be read in the current rows, class label included
//file_wr.write(num_features);
binFile.writeDWord((long)(num_features+1));
while (st.hasMoreTokens()) {
String s = st.nextToken();
if ((attrib_counter == num_features) && (s!= null)) {
// last column contains class label for that gene
// write the correspondent number of the class
for (j= 0; j< num_classes; j++) // look for the index of the class label
if (class_labels[j].equals(s) == true) break;
binFile.writeDWord((long)(CLASS_NUM+j));
} // end if
else { // Attributo predittivo e non di classe
if (s!= null) {
//here I have to check if the couple (attr, value) already exists.
//if yes return the correspondent number and write down it in the binary file
// otherwise add the couple to the hash table writing down in the binary file
// the correspondent number of the first avaible couple associated to that attribute
String myStr = "Attr"+attrib_counter+"Value"+s;
Integer n = (Integer)hash.get(myStr);
if (n != null) {
// I find the key (Attrib, Value) so we must write the correspondent number n into the binary file
binFile.writeDWord(n.longValue());
} else {
// value assigned is the next incremental value available
current_value++;
hash.put(myStr, new Integer(current_value));
binFile.writeDWord((long)current_value);
}
}
}
attrib_counter++;
}
} // end if (start== true)
if ((line2.equalsIgnoreCase("@data")== true))
start = true; //from the next cycle let's start reading!
} // end else (line == null)
attrib_counter = 0;
}
buff_rd.close();
file_rd.close();
binFile = null;
file.close();
} catch (IOException e) {
e.getMessage();
}
return hash;
}
public void leggiDWordbin(String filename) {
RandomAccessFile file;
BinaryFile binFile;
// set the endian mode to LITTLE_ENDIAN
final short endian = BinaryFile.LITTLE_ENDIAN;
// set the signed mode to unsigned
final boolean signed = false;
long tid, cid, numItems, item;
long i;
try {
file = new RandomAccessFile(filename, "r");
binFile = new BinaryFile(file);
// set the endian mode to LITTLE_ENDIAN
binFile.setEndian(BinaryFile.LITTLE_ENDIAN);
// set the signed mode to unsigned
binFile.setSigned(false);
while (true) {
// read tid, cid, and number of items
tid=binFile.readDWord();
cid=binFile.readDWord();
numItems=binFile.readDWord();
for (i=0;i<numItems-1;i++) {
item=binFile.readDWord();
}
item=binFile.readDWord();
}
} catch (Exception e) {
System.out.println("**Error: " + e.getMessage());
}
}
public void leggiWordbin(String filename) {
RandomAccessFile file;
BinaryFile binFile;
// set the endian mode to LITTLE_ENDIAN
final short endian = BinaryFile.LITTLE_ENDIAN;
// set the signed mode to unsigned
final boolean signed = false;
int tid, cid, numItems, item;
long i;
try {
//System.out.println("Open file "+filename);
file = new RandomAccessFile(filename, "r");
binFile = new BinaryFile(file);
// set the endian mode to LITTLE_ENDIAN
binFile.setEndian(BinaryFile.LITTLE_ENDIAN);
// set the signed mode to unsigned
binFile.setSigned(false);
while (true) {
// read tid, cid, and number of items
tid=binFile.readWord();
cid=binFile.readWord();
numItems=binFile.readWord();
for (i=0;i<numItems-1;i++) {
item=binFile.readWord();
}
item=binFile.readWord();
}
} catch (Exception e) {
System.out.println("**Error: " + e.getMessage());
}
}
public void readbin(String filename) {
try {
FileInputStream file = new FileInputStream(filename);
boolean eof = false;
int count = 0;
while (!eof) {
int input = file.read();
//System.out.println("input"+input+"\n");
if (input == -1)
eof = true;
else
count++;
}
file.close();
//System.out.println("Byte read: "+count+"\n");
} catch (IOException e) {
e.printStackTrace();
}
}
String create_arff_from_data(String dataset, String path_name) {
String arff_filename = path_name + "dataset_converted.Arff";
try {
//count the number of attributes
FileReader file_rd = new FileReader(dataset);
BufferedReader buff_rd = new BufferedReader(file_rd);
// read the first line and parse different elements in order to understand number and types of attributes
// let's suppose that class attribute is last one by default
String line2 = buff_rd.readLine();
StringTokenizer st = new StringTokenizer(line2, ",");
// count the number of attributes
int attributes = 0;
String s;
while (st.hasMoreTokens()) {
s = st.nextToken();
attributes++;
}
FileWriter fw = new FileWriter(arff_filename);
// write the header part
fw.write("@relation Arff_dataset"+"\n"+"\n");
for (int counter = 1; counter < (attributes+1); counter++) {
if (counter != attributes)
fw.write("@attribute "+"attr"+counter+" numeric"+"\n");
else {
fw.write("@attribute class {");
for (int i=0; i< class_labels.length; i++) {
if (i!= class_labels.length-1)
fw.write(class_labels[i]+",");
else
fw.write(class_labels[i]+"}"+"\n");
}
}
}
fw.write("\n"+"@data");
buff_rd.close();
file_rd.close();
boolean eof = false;
file_rd = new FileReader(dataset);
buff_rd = new BufferedReader(file_rd);
while (!eof) {
line2 = buff_rd.readLine();
if (line2 == null)
eof = true;
else {
//write the line in the arff data file
fw.write("\n");
fw.write(line2);
}
}
fw.close();
buff_rd.close();
file_rd.close();
} catch (IOException e) {
e.printStackTrace();
}
return arff_filename;
}
String create_arff_from_csv(String dataset, String pathname) {
String arff_filename = null;
String conversion_path = null;
try {
//count the number of attributes
FileReader file_rd = new FileReader(dataset);
BufferedReader buff_rd = new BufferedReader(file_rd);
// read the first line and parse different elements in order to understand number and types of attributes
// let's suppose that class attribute is last one by default
String line2 = buff_rd.readLine();
StringTokenizer st = new StringTokenizer(line2, ",");
// count the number of attributes
int attributes = 0;
String s;
while (st.hasMoreTokens()) {
s = st.nextToken();
attributes++;
}
arff_filename = dataset.substring(dataset.lastIndexOf("/")+1,dataset.lastIndexOf("."));
conversion_path =pathname+ arff_filename+".Arff";
FileWriter fw = new FileWriter(conversion_path);
// write the header part
fw.write("@relation Arff_dataset"+"\n"+"\n");
for (int counter = 1; counter < (attributes+1); counter++) {
if (counter != attributes)
fw.write("@attribute "+counter+" string"+"\n");
else {
fw.write("@attribute class string"+"\n");
}
}
fw.write("@data");
boolean eof = false;
while (!eof) {
line2 = buff_rd.readLine();
if (line2 == null)
eof = true;
else {
fw.write("\n");
fw.write(line2);
}
}
fw.close();
buff_rd.close();
file_rd.close();
} catch (IOException e) {
e.printStackTrace();
}
return conversion_path;
}
void converti_in_numerico(String dataset, String numerico_path, String format) {
boolean eof = false;
int j = 0;
BufferedReader buff_rd;
FileReader file_rd;
FileWriter cl_file = null;
boolean start = false; // start to read after "@data" line
int attrib_counter= 0;
int sample_index=0;
int feature = -1;
String line2 = null;
StringTokenizer st;
Integer n2;
String myStr2 = null;
int current_value = 0;
int current_class_value = 0;
Hashtable hash = new Hashtable();
try {
file_rd = new FileReader(dataset);
buff_rd = new BufferedReader(file_rd);
FileWriter fw = new FileWriter(numerico_path);
if (format.equalsIgnoreCase("Arff")== true) {
while (!eof) {
line2 = buff_rd.readLine();
if (line2 == null)
eof = true;
else {
if (start == true) {
st = new StringTokenizer(line2, ",");
while (st.hasMoreTokens()) {
String s = st.nextToken();
if ((attrib_counter == (feature)) && (s!= null)) {
// last column contains class label for that gene
// here I have to check if the couple (attr, value) already exists.
// if yes return the correspondent number and write down it in the binary file
// otherwise add the couple to the hash table writing down in the binary file
// the correspondent number of the first avaible couple associated to that attribute
String myStr = "Attr"+attrib_counter+"Value"+s;
Integer n = (Integer)hash.get(myStr);
if (n != null) {
// I find the key (Attrib, Value) so we must write the correspondent number n into the binary file
//file_wr.write(n.intValue());
fw.write(n+"\n");
} else {
// value assigned is the next incremental value aviable
current_class_value++;
hash.put(myStr, new Integer(current_class_value));
fw.write(current_class_value+"\n");
}
} // end if
else {
if (s!= null) {
//here I have to check if the couple (attr, value) already exists.
//if yes return the correspondent number and write down it in the binary file
// otherwise add the couple to the hash table writing down in the binary file
// the correspondent number of the first avaible couple associated to that attribute
String myStr = "Attr"+attrib_counter+"Value"+s;
Integer n = (Integer)hash.get(myStr);
if (n != null) {
// I find the key (Attrib, Value) so we must write the correspondent number n into the binary file
//file_wr.write(n.intValue());
fw.write(n+",");
} else {
// value assigned is the next incremental value aviable
current_value++;
hash.put(myStr, new Integer(current_value));
fw.write(current_value+",");
}
}
}
attrib_counter++;
}
} // end if (start== true)
else {
fw.write(line2+"\n");
if (line2.contains("@attribute")== true)
feature++;
}
if ((line2.equalsIgnoreCase("@data")== true))
start = true; //from the next cycle let's start reading!
} // end else (line == null)
attrib_counter = 0;
}
}
if (format.equalsIgnoreCase("csv")== true) {
while (!eof) {
line2 = buff_rd.readLine();
if (line2 == null)
eof = true;
else {
if (start == true) {
st = new StringTokenizer(line2, ",");
while (st.hasMoreTokens()) {
String s = st.nextToken();
if ((attrib_counter == (feature)) && (s!= null)) {
// last column contains class label for that gene
// here I have to check if the couple (attr, value) already exists.
// if yes return the correspondent number and write down it in the binary file
// otherwise add the couple to the hash table writing down in the binary file
// the correspondent number of the first avaible couple associated to that attribute
String myStr = "Attr"+attrib_counter+"Value"+s;
Integer n = (Integer)hash.get(myStr);
if (n != null) {
// I find the key (Attrib, Value) so we must write the correspondent number n into the binary file
fw.write(n+"\n");
} else {
// value assigned is the next incremental value aviable
current_class_value++;
hash.put(myStr, new Integer(current_class_value));
fw.write(current_class_value+"\n");
}
} // end if
else {
if (s!= null) {
//here I have to check if the couple (attr, value) already exists.
//if yes return the correspondent number and write down it in the binary file
// otherwise add the couple to the hash table writing down in the binary file
// the correspondent number of the first avaible couple associated to that attribute
String myStr = "Attr"+attrib_counter+"Value"+s;
Integer n = (Integer)hash.get(myStr);
if (n != null) {
// I find the key (Attrib, Value) so we must write the correspondent number n into the binary file
fw.write(n+",");
} else {
// value assigned is the next incremental value aviable
current_value++;
hash.put(myStr, new Integer(current_value));
fw.write(current_value+",");
}
}
}
attrib_counter++;
}
} // end if (start== true)
else {
fw.write(line2+"\n");
StringTokenizer st2;
st2 = new StringTokenizer(line2, ",");
while (st2.hasMoreTokens()) {
String s2 = st2.nextToken();
feature++;
}
start = true;
}
} // end else (line == null)
attrib_counter = 0;
}
}
if (format.equalsIgnoreCase("data")== true) {
boolean firstline = true;
while (!eof) {
line2 = buff_rd.readLine();
if (firstline) {
StringTokenizer st3;
st3 = new StringTokenizer(line2, ",");
while (st3.hasMoreTokens()) {
String s3 = st3.nextToken();
feature++;
}
firstline = false;
}
if (line2 == null)
eof = true;
else {
st = new StringTokenizer(line2, ",");
while (st.hasMoreTokens()) {
String s = st.nextToken();
if ((attrib_counter == (feature)) && (s!= null)) {
// last column contains class label for that gene
// here I have to check if the couple (attr, value) already exists.
// if yes return the correspondent number and write down it in the binary file
// otherwise add the couple to the hash table writing down in the binary file
// the correspondent number of the first avaible couple associated to that attribute
String myStr = "Attr"+attrib_counter+"Value"+s;
Integer n = (Integer)hash.get(myStr);
if (n != null) {
// I find the key (Attrib, Value) so we must write the correspondent number n into the binary file
//file_wr.write(n.intValue());
fw.write(n+"\n");
} else {
// value assigned is the next incremental value aviable
hash.put(myStr, new Integer(current_class_value));
//file_wr.write((int)current_value);
fw.write(current_class_value+"\n");
current_class_value++;
}
} // end if
else {
if (s!= null) {
//here I have to check if the couple (attr, value) already exists.
//if yes return the correspondent number and write down it in the binary file
// otherwise add the couple to the hash table writing down in the binary file
// the correspondent number of the first avaible couple associated to that attribute
String myStr = "Attr"+attrib_counter+"Value"+s;
Integer n = (Integer)hash.get(myStr);
if (n != null) {
// I find the key (Attrib, Value) so we must write the correspondent number n into the binary file
//file_wr.write(n.intValue());
fw.write(n+",");
} else {
// value assigned is the next incremental value aviable
current_value++;
hash.put(myStr, new Integer(current_value));
//file_wr.write((int)current_value);
fw.write(current_value+",");
}
}
}
attrib_counter++;
}
} // end else (line == null)
attrib_counter = 0;
}
}
buff_rd.close();
fw.close();
file_rd.close();
} catch (IOException e) {
e.getMessage();
}
}
public String[] read_Class_Labels_from_instances(Instances inst) {
num_classes = inst.numClasses();
String[] classes = new String[num_classes];
//initialization
for (int g = 0; g < num_classes; g++)
classes[g] = "";
int cur_num_classes = -1;
boolean found = false;
for (int i = 0; i < inst.numInstances(); i++) {
if (cur_num_classes >= 0) // search for the current class label
for (int k = 0; k < (cur_num_classes) && (found== false) ; k++) {
if (inst.instance(i).classValue() == Double.parseDouble(classes[k]))
found = true;
}
if (!found) {
if (cur_num_classes == -1)
cur_num_classes++;
classes[cur_num_classes]= new String(String.valueOf(inst.instance(i).classValue()));
cur_num_classes++;
}
found = false;
}
return classes;
}
public Hashtable creabin_from_instances(Instances inst, int label, String[] class_labels, Hashtable hash, String pathname, String dataset, int current_fold) {
// read transactions file and save them into a binary file
RandomAccessFile file;
BinaryFile binFile;
// set the endian mode to LITTLE_ENDIAN
final short endian = BinaryFile.LITTLE_ENDIAN;
// set the signed mode to unsigned
final boolean signed = false;
int j = 0;
FileWriter cl_file = null;
int current_value = 0;
long t_id = 0;
long c_id = 0;
boolean found = false;
try {
String dataset_name = dataset.substring(dataset.lastIndexOf("/")+1, dataset.lastIndexOf("."));
String bin_name = pathname + dataset_name + "k"+ current_fold +".bin";
String class_path = pathname + dataset_name + "k"+ current_fold + ".cls";
//System.out.println("class path name: "+ class_path+"\n");
//System.out.println("bin_name: "+bin_name+"\n");
file = new RandomAccessFile(bin_name, "rw");
binFile = new BinaryFile(file);
// set the endian mode to LITTLE_ENDIAN
try {
binFile.setEndian(BinaryFile.LITTLE_ENDIAN);
} catch (Exception e) {
e.printStackTrace();
}
// set the signed mode to unsigned
binFile.setSigned(false);
num_classes = inst.numClasses();
if (label == 1) {
// create also class labels file
cl_file = new FileWriter(class_path);
cl_file.write(Integer.toString(CLASS_NUM)+"\n");
for (int i = 0; i< num_classes; i++) {
cl_file.write(class_labels[i]+"\n");
}
cl_file.close();
}
for (int i = 0; i < inst.numInstances(); i++) {
binFile.writeDWord(t_id);
binFile.writeDWord(c_id);
t_id++;
c_id++;
// write X number of objects to be read in the current rows, class label included
//file_wr.write(num_features);
binFile.writeDWord((long)(inst.firstInstance().numAttributes()));
for (int k = 0; k < inst.instance(i).numAttributes()-1; k++) {
// hash table handling
// here I have to check if the couple (attr, value) already exists.
// if yes return the correspondent number and write down it in the binary file
// otherwise add the couple to the hash table writing down in the binary file
// the correspondent number of the first avaible couple associated to that attribute
String myStr = "Attr"+k+"Value"+String.valueOf(inst.instance(i).value(k));
Integer n = (Integer)hash.get(myStr);
if (n != null) {
// I find the key (Attrib, Value) so we must write the correspondent number n into the binary file
binFile.writeDWord(n.longValue());
} else {
// value assigned is the next incremental value aviable
current_value++;
hash.put(myStr, new Integer(current_value));
binFile.writeDWord((long)current_value);
}
}
for (j= 0; j< num_classes; j++) // look for the index of the class label
if (Double.parseDouble(class_labels[j])==(inst.instance(i).classValue()) == true)
break;
binFile.writeDWord((long)(CLASS_NUM+j));
found = false;
}
file.close();
} catch (IOException e) {
e.getMessage();
}
return hash;
}
String create_csv_from_data(String dataset, String[] class_labels, String path_name) {
String csv_filename = path_name + "dataset_converted.csv";
try {
//count the number of attributes
FileReader file_rd = new FileReader(dataset);
BufferedReader buff_rd = new BufferedReader(file_rd);
// read the first line and parse different elements in order to understand number and types of attributes
// let's suppose that class attribute is last one by default
String line2 = buff_rd.readLine();
StringTokenizer st = new StringTokenizer(line2, ",");
// count the number of attributes
int attributes = 0;
String s;
while (st.hasMoreTokens()) {
s = st.nextToken();
attributes++;
}
FileWriter fw = new FileWriter(csv_filename);
// write the header part
for (int counter = 1; counter < (attributes+1); counter++) {
if (counter != attributes)
fw.write("Attr"+counter+",");
else {
fw.write("class");
}
}
buff_rd.close();
file_rd.close();
boolean eof = false;
file_rd = new FileReader(dataset);
buff_rd = new BufferedReader(file_rd);
while (!eof) {
line2 = buff_rd.readLine();
if (line2 == null)
eof = true;
else {
//write the line in the arff data file
fw.write("\n");
fw.write(line2);
}
}
fw.close();
buff_rd.close();
file_rd.close();
} catch (IOException e) {
e.printStackTrace();
}
return csv_filename;
}
void update_class_values(String training_filename, String training_filename_new, String[] class_labels) {
try {
FileReader file_rd = new FileReader(training_filename);
BufferedReader buff_rd = new BufferedReader(file_rd);
FileWriter fw = new FileWriter(training_filename_new);
// write the same file except for the attribute class
boolean eof = false;
String line = null;
int contatore = 1;
while (!eof) {
line = buff_rd.readLine();
if (line == null)
eof = true;
else {
//write the line in the arff data file
if (line.indexOf("@attribute")==-1)
fw.write(line+"\n");
else {
if (line.indexOf("@attribute class")!=-1) {
// write the full class list
fw.write("@attribute class {");
for (int i= 0; i < class_labels.length; i++) {
fw.write("'"+class_labels[i]+"'");
if (i !=(class_labels.length -1) )
fw.write(",");
}
fw.write("}");
fw.write("\n");
} else {
String linea = "@attribute attr"+contatore+" String";
fw.write(linea+"\n");
contatore++;
}
}
}
}
fw.close();
buff_rd.close();
file_rd.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}