package weka.classifiers.rules.ruleshandler;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.FieldPosition;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Locale;
public class CClasse {
double[] macroRegoleClassi;
double[] regoleClassi;
static int MAX_CLASSES = 30; // Da modificare anche nel main
static int MAX_ITEM = 100000;
static int RULE_MAX_LENGHT = 1300;
static int CODA_MAX_LEN = 1300;
int classeDesiderata;
int maxNumItem;
double tidSet;
double regoleTotali;
double genTotali;
double maxNodi;
double maxRec;
double totRec;
double maxDepth;
double macroRegoleTotali;
static int CONDITION_MAX_LEN = 3000;
LinkedList<CFpNode> nodi;
CFrequentItem node_pattern_base;
double noditree;
double create_node;
StringBuffer s = new StringBuffer();
public CClasse () {
macroRegoleClassi = new double[MAX_CLASSES];
regoleClassi = new double[MAX_CLASSES];
nodi = new LinkedList<CFpNode>();
}
public void estraiPerClasse(int len,String[] arg,int classeAttuale) {
CItem[] tmptab ;
int supp_thres;
CFrequentDistinct frequentcounter = new CFrequentDistinct(0);
CFrequentDistinct distinctcounter = new CFrequentDistinct(0);
CHeaderTable htab ;
CFptree fp ;
CMacroItem[] condition = new CMacroItem[CONDITION_MAX_LEN];
CMacroItem[] coda = new CMacroItem[CODA_MAX_LEN];
String nomeFileUscita;
PrintWriter piw = null;
classeDesiderata = classeAttuale ;
regoleTotali = 0;
genTotali = 0;
macroRegoleTotali = 0;
maxNodi = 0;
maxRec = 0;
totRec = 0;
maxDepth = 0;
for ( int h = 0 ; h < CODA_MAX_LEN ; h++ ) {
coda[h] = new CMacroItem();
}
for ( int h = 0 ; h < CONDITION_MAX_LEN ; h++ ) {
condition[h] = new CMacroItem();
}
supp_thres = (((int)CMain.supp_threshold)*CMain.suppClasses[classeDesiderata])/100;
if ( supp_thres < 1) {
supp_thres = 1;
}
if ( (tmptab = supportCounting(arg[0],supp_thres,frequentcounter,distinctcounter)) == null ) {
System.out.println("Not create temp table\n");
System.exit(3);
}
if ( (htab = firstHeaderTableCreate(tmptab,supp_thres,frequentcounter)) == null) {
System.out.println("Not create htable\n");
System.exit(4);
}
if ( (fp = firstFpTreeCreate(arg[0],htab,distinctcounter.freqdistinct)) == null) {
System.out.println("Not create fp tree");
System.exit(5);
}
nomeFileUscita = new String(arg[4]+"c"+classeDesiderata+arg[3]);
try {
piw = new PrintWriter( new BufferedWriter ( new FileWriter ( nomeFileUscita)));
} catch (IOException e) {
e.printStackTrace();
System.exit(2);
}
fpMine ( htab,fp,condition,1,supp_thres,piw,coda,0,supp_thres,0);
macroRegoleClassi[classeAttuale] = macroRegoleTotali;
regoleClassi[classeAttuale] = regoleTotali;
return;
}
public CItem[] supportCounting(String fileName,int threshold,CFrequentDistinct frequentcounter,CFrequentDistinct distinctcounter) {
CItem[] tmptab = new CItem[MAX_ITEM];
for ( int z=0 ; z<MAX_ITEM ; z++) {
tmptab[z] = new CItem(MAX_CLASSES);
}
byte b;
int n=0;
int[] t ;
try {
FileInputStream fis = new FileInputStream ( fileName );
DataInputStream di = new DataInputStream(fis);
maxNumItem = 0;
while ( true ) {
for ( int h = 0 ; h<3 ; h++ ) {
ByteBuffer bf = ByteBuffer.allocate(4);
for ( int k = 0 ; k<4 ; k++ ) {
b = di.readByte();
bf.order(ByteOrder.LITTLE_ENDIAN);
bf.put(b);
}
n = bf.getInt(0);
}
t = new int[n];
for ( int f = 0 ; f<n ; f++) {
ByteBuffer buf = ByteBuffer.allocate(4);
for ( int g = 0 ; g<4 ; g++ ) {
b = di.readByte();
buf.order(ByteOrder.LITTLE_ENDIAN);
buf.put(b);
}
t[f] = buf.getInt(0);
}
for ( int k = 0 ; k<n-1 ; k++ ) {
if ( tmptab[t[k]-1].supp == 0 ) {
distinctcounter.add(1);
if ( t[k] > maxNumItem )
maxNumItem = t[k];
}
tmptab[t[k]-1].supp++;
tmptab[t[k]-1].suppClass[t[n-1]-CMain.idBaseClasse]++;
if ( (t[n-1]-CMain.idBaseClasse) == classeDesiderata ) {
if ( tmptab[t[k]-1].suppClass[classeDesiderata] == threshold ) {
frequentcounter.add(1);
}
}
}
}
} catch (EOFException eofx) {
maxNumItem++;
return tmptab ;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public CHeaderTable firstHeaderTableCreate(CItem[] tmptab,int supp_thres,CFrequentDistinct frequentcounter) {
CHeaderTable htab = new CHeaderTable(frequentcounter.freqdistinct);
htab.frequentCount = frequentcounter.freqdistinct;
int j = 0;
for ( int i = 0 ; i<maxNumItem ; i++ ) {
if ( tmptab[i].suppClass[classeDesiderata] >= supp_thres) {
CFrequentItem cfi = new CFrequentItem(MAX_CLASSES);
cfi.itemId = i+1;
cfi.supp = tmptab[i].supp;
for ( int h = 0 ; h<MAX_CLASSES ; h++) {
cfi.suppClass[h] = tmptab[i].suppClass[h];
}
htab.frequentArray[j] = cfi;
j++;
}
}
if (frequentcounter.freqdistinct > 0) /***riga aggiunta ***/
htab.quicksort(0,frequentcounter.freqdistinct-1);
return htab;
}
public CFptree firstFpTreeCreate(String file,CHeaderTable htab,int distinctcounter) {
CFptree fpt = new CFptree(MAX_CLASSES);
CFpNode parent;
CFpNode current;
int itemClasse;
int[] present_item;
byte b;
int n=0;
int itemid;
try {
FileInputStream fis = new FileInputStream ( file );
DataInputStream di = new DataInputStream(fis);
present_item = new int [distinctcounter];
while ( true ) {
for ( int h = 0 ; h<3 ; h++ ) {
ByteBuffer bf = ByteBuffer.allocate(4);
for ( int k = 0 ; k<4 ; k++ ) {
b = di.readByte();
bf.order(ByteOrder.LITTLE_ENDIAN);
bf.put(b);
}
n = bf.getInt(0);
}
for ( int a = 0 ; a<distinctcounter-1 ; a++ ) {
present_item[a] = 0;
}
for ( int u = 0 ; u < n-1 ; u++ ) {
ByteBuffer buf = ByteBuffer.allocate(4);
for ( int g = 0 ; g<4 ; g++ ) {
b = di.readByte();
buf.order(ByteOrder.LITTLE_ENDIAN);
buf.put(b);
}
itemid = buf.getInt(0);
present_item[itemid-1] = 1;
}
ByteBuffer buf2 = ByteBuffer.allocate(4);
for ( int g = 0 ; g<4 ; g++ ) {
b = di.readByte();
buf2.order(ByteOrder.LITTLE_ENDIAN);
buf2.put(b);
}
itemClasse = buf2.getInt(0);
itemClasse = itemClasse-CMain.idBaseClasse;
if ( itemClasse < 0 ) {
System.out.println("Classe errata "+itemClasse);
System.exit(1);
}
parent = fpt.root;
for ( int i = htab.frequentCount-1 ; i >=0 ; i-- ) {
if ( present_item[htab.frequentArray[i].itemId-1] == 1 ) {
present_item[htab.frequentArray[i].itemId-1] = 0;
if ( (current = firstInsertNode(parent,htab.frequentArray[i],itemClasse,i)) == null ) {
return null;
}
parent = current;
}
}
}
} catch (EOFException eofx) {
return fpt;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public CFpNode firstInsertNode(CFpNode parent,CFrequentItem htab_entry,int item_classe,int index) {
CChildPtr curr_elem;
CChildPtr prev_elem;
CChildPtr tmp_elem;
CFpNode tmp_child;
int found = 0;
curr_elem = parent.children;
prev_elem = parent.children;
while ((curr_elem != null) && (found == 0 ) ) {
if ( curr_elem.child.itemId == htab_entry.itemId) {
found = 1;
} else {
prev_elem = curr_elem;
curr_elem = curr_elem.next;
}
}
if ( found == 1) {
curr_elem.child.local_supp++;
curr_elem.child.local_suppClass[item_classe]++;
parent = curr_elem.child;
}
else {
tmp_elem = new CChildPtr(MAX_CLASSES);
tmp_child = tmp_elem.child;
tmp_child.itemId = htab_entry.itemId;
tmp_child.parent = parent;
tmp_child.local_supp = 1;
for ( int x = 0 ; x < MAX_CLASSES ; x++) {
tmp_child.local_suppClass[x] = 0;
}
tmp_child.local_suppClass[item_classe] = 1;
tmp_child.next = htab_entry.head;
if ( prev_elem == null ) {
parent.children = tmp_elem;
} else {
prev_elem.next = tmp_elem;
}
htab_entry.head = (tmp_child);
htab_entry.incCounter(1);
parent = tmp_child;
}
return parent;
}
public void fpMine (CHeaderTable htab,CFptree fptree,CMacroItem[] condition,int minedepth,int supp_thres,PrintWriter nomeFileUscita,CMacroItem[] coda,int dimcoda,int supp_cond,double noditotali) {
CFrequentItem[] freq_array ;
CFpNode node;
CFpNode nodelink;
CFptree new_fptree;
CHeaderTable new_htab;
CItemEntry tmptab = new CItemEntry(MAX_CLASSES);
CItemEntry new_entry = new CItemEntry(MAX_CLASSES);
CItemEntry prev_entry = new CItemEntry(MAX_CLASSES);
CItemEntry curr_entry = new CItemEntry(MAX_CLASSES);
int found;
int oldsup;
totRec++;
int i;
int frequent_count = 0;
int t = 0;
if ( minedepth > 1 ) {
dimcoda = accresciCoda(coda,dimcoda,htab,supp_cond);
}
if ( fptree.branches == 1 ) {
combineItem(htab,condition,minedepth,htab.frequentCount,nomeFileUscita,supp_thres,coda,dimcoda);
} else {
freq_array = htab.frequentArray;
for ( i = 0; i<htab.frequentCount ; i++) {
if ( freq_array[i].accorpato == 0) {
if ( freq_array[i].nodeLinkCounter == 0) {
System.err.println("Errore: freq_array["+i+"].nodeLinkCounter รจ zero!!");
return;
}
int sitem = 1;
condition[minedepth-1] = new CMacroItem();
condition[minedepth-1].vettItemId[0] = freq_array[i].itemId;
Iterator<Integer> it = freq_array[i].itemAccorpati.iterator();
while (it.hasNext()) {
int tmp = it.next();
condition[minedepth-1].vettItemId[sitem] = tmp;
sitem++;
}
condition[minedepth -1].numItem = sitem;
frequent_count = 0;
tmptab = null;
new_htab = null;
new_fptree = null;
nodelink = freq_array[i].head;
for ( int q = 0 ; q<freq_array[i].nodeLinkCounter ; q++) {
node = nodelink.parent;
while ( node.parent != null ) {
for ( t = i+1; t<(htab.frequentCount) && (freq_array[t].itemId!=node.itemId); t++) {}
if ( (t < htab.frequentCount) && (freq_array[t].accorpato == 0 )) {
curr_entry = tmptab;
prev_entry = tmptab;
found = 0;
while ( (curr_entry != null) && (found == 0) ) {
if ( curr_entry.itemId == node.itemId) {
found = 1;
oldsup = curr_entry.suppClass[classeDesiderata];
curr_entry.supp += nodelink.local_supp;
for ( int u = 0 ; u<MAX_CLASSES ; u++) {
curr_entry.suppClass[u] += nodelink.local_suppClass[u];
}
if ( (oldsup < supp_thres ) && (curr_entry.suppClass[classeDesiderata] >= supp_thres ) ) {
frequent_count++;
}
} else {
prev_entry = curr_entry;
curr_entry = curr_entry.next;
}
}
if ( found == 0 ) {
new_entry = new CItemEntry(MAX_CLASSES);
new_entry.itemId = node.itemId;
new_entry.supp = nodelink.local_supp;
for ( int g = 0 ; g<MAX_CLASSES ; g++ ) {
new_entry.suppClass[g] = nodelink.local_suppClass[g];
}
if (new_entry.suppClass[classeDesiderata] >= supp_thres) {
frequent_count++;
}
if ( prev_entry == null ) {
tmptab = new_entry;
} else {
prev_entry.next = new_entry;
}
}
}
node = node.parent;
}
nodelink = nodelink.next;
}
if ( frequent_count == 0) {
storeItemset(condition,minedepth,null,0,freq_array[i],0,0,null,nomeFileUscita,supp_thres,coda,dimcoda);
} else {
if ( (new_htab = headerTableCreate(tmptab,supp_thres,frequent_count)) == null ) {
System.exit(1);
}
for ( int w = 0; w<new_htab.frequentCount ; w++ ) {
int hti = 0;
while ( freq_array[hti].itemId != new_htab.frequentArray[w].itemId) {
hti++;
}
Iterator<Integer> iter = freq_array[hti].itemAccorpati.iterator();
while ( iter.hasNext()) {
new_htab.frequentArray[w].itemAccorpati.addFirst(iter.next());
}
}
int newdimcoda = dimcoda;
newdimcoda = accresciCodaperCondition(coda,newdimcoda,new_htab,freq_array[i].supp);
storeItemset(condition, minedepth, null, 0 , freq_array[i], 0 , 0, null, nomeFileUscita, supp_thres, coda, newdimcoda);
node_pattern_base = freq_array[i];
if ( (new_fptree = fptCreate(new_htab)) == null) {
System.exit(1);
}
freq_array[i] = node_pattern_base;
noditotali = noditotali + noditree;
if ( noditotali > maxNodi ) {
maxNodi = noditotali;
maxRec = minedepth;
}
fpMine(new_htab, new_fptree, condition, minedepth+1, supp_thres, nomeFileUscita, coda, dimcoda, freq_array[i].supp, noditotali);
}
}
}
}
return;
}
public int accresciCoda(CMacroItem[] coda,int dimcoda,CHeaderTable htab,int supp_cond) {
int newfreqcounter;
int ht;
int sitem;
CFrequentItem[] freq_array;
freq_array = htab.frequentArray;
for ( ht = 0 ; ht<htab.frequentCount ; ht++ ) {
if ( freq_array[ht].supp == supp_cond ) {
freq_array[ht].accorpato = 1;
sitem = 1;
coda[dimcoda].vettItemId[0] = freq_array[ht].itemId;
Iterator<Integer> it = freq_array[ht].itemAccorpati.iterator();
while ( it.hasNext() ) {
coda[dimcoda].vettItemId[sitem] = it.next();
sitem++;
}
coda[dimcoda].numItem = sitem;
dimcoda++;
}
}
newfreqcounter = 0;
for ( ht=0 ; ht<htab.frequentCount ; ht++ ) {
if ( freq_array[ht].accorpato == 0 ) {
freq_array[newfreqcounter] = freq_array[ht];
newfreqcounter++;
}
}
htab.setFrequent(newfreqcounter);
return dimcoda;
}
public void creaMacroItem (CHeaderTable htab,CFptree fptree) {
CFrequentItem[] freq_array;
int ih,ht;
int newFreqcounter;
int rif,c,p;
int dim_possibili_item_eq;
int uguali;
CItemEq[] possibili_item_eq = new CItemEq[maxNumItem];
for ( int j = 0 ; j<maxNumItem ; j++ ) {
possibili_item_eq[j] = new CItemEq();
}
freq_array = htab.frequentArray;
for ( ih = htab.frequentCount-1 ; ih >= 0 ; ih-- ) {
freq_array[ih].accorpato = 0;
}
for ( rif = htab.frequentCount-1 ; rif>0 ; rif-- ) {
dim_possibili_item_eq = 0;
if ( freq_array[rif].accorpato == 0 ) {
ih = rif-1;
while ( (ih > 0) && (freq_array[ih].supp == freq_array[rif].supp) ) {
if ( freq_array[ih].accorpato == 0 ) {
uguali = 1;
c = 0;
while ( (c<MAX_CLASSES) && (uguali == 1) ) {
if ( freq_array[ih].suppClass[c] != freq_array[rif].suppClass[c] ) {
uguali = 0;
}
c++;
}
if ( uguali == 1 ) {
possibili_item_eq[dim_possibili_item_eq].pos = ih;
possibili_item_eq[dim_possibili_item_eq].flag = 0;
dim_possibili_item_eq++;
}
}
ih--;
}
if ( dim_possibili_item_eq > 0 ) {
equivalenti(htab,fptree,rif,possibili_item_eq,dim_possibili_item_eq);
for ( p = 0 ; p < dim_possibili_item_eq ; p++ ) {
if ( possibili_item_eq[p].flag == 0 ) {
freq_array[possibili_item_eq[p].pos].accorpato = 1;
int itemacc = freq_array[possibili_item_eq[p].pos].itemId;
freq_array[rif].itemAccorpati.addFirst(itemacc);
Iterator<Integer> it = freq_array[possibili_item_eq[p].pos].itemAccorpati.iterator();
while ( it.hasNext() ) {
itemacc = it.next();
freq_array[rif].itemAccorpati.addFirst(itemacc);
}
}
}
}
}
}
freq_array = htab.frequentArray;
newFreqcounter = 0;
for ( ht = 0 ; ht < htab.frequentCount ; ht++ ) {
if ( freq_array[ht].accorpato == 0 ) {
freq_array[newFreqcounter] = freq_array[ht];
newFreqcounter++;
} else {
freq_array[ht].itemAccorpati.clear();
}
}
htab.setFrequent(newFreqcounter);
return;
}
public void equivalenti(CHeaderTable htab,CFptree fptree,int pos_item_di_rif ,CItemEq[] possibili_item_eq, int dim_possibili_item_eq ) {
CItem[] itempresenti = new CItem[maxNumItem];
CFpNode nodelink;
int esistono_candidati;
int i,c,livello;
CChildPtr curr_elem;
for ( int k = 0 ; k < maxNumItem ; k++ ) {
itempresenti[k] = new CItem(MAX_CLASSES);
}
esistono_candidati = 1;
nodelink = htab.frequentArray[pos_item_di_rif].head;
livello = pos_item_di_rif-possibili_item_eq[dim_possibili_item_eq-1].pos;
while ( (nodelink != null) && ( esistono_candidati == 1 ) ) {
curr_elem = nodelink.children;
while ( curr_elem != null ) {
aggiornaPresenze ( curr_elem , itempresenti , livello );
curr_elem = curr_elem.next;
}
esistono_candidati = 0;
for ( i = 0 ; i < dim_possibili_item_eq ; i++ ) {
if ( (possibili_item_eq[i].flag == 0) && ( itempresenti[(htab.frequentArray[possibili_item_eq[i].pos]).itemId].supp == nodelink.local_supp) ) {
c = 0;
while ( ( c < MAX_CLASSES ) && ( itempresenti[(htab.frequentArray[possibili_item_eq[i].pos]).itemId].suppClass[c] == nodelink.local_suppClass[c])) {
c++;
}
if ( c == MAX_CLASSES ) {
esistono_candidati = 1;
} else {
possibili_item_eq[i].flag = -1;
}
} else {
possibili_item_eq[i].flag = -1;
}
}
nodelink = nodelink.next;
}
return;
}
public void aggiornaPresenze ( CChildPtr curr_elem ,CItem[] itempresenti , int livello ) {
CFpNode node;
CChildPtr child;
int c;
node = curr_elem.child;
itempresenti[node.itemId].supp = itempresenti[node.itemId].supp+node.local_supp;
for ( c = 0 ; c < MAX_CLASSES ; c++ ) {
itempresenti[node.itemId].suppClass[c] = itempresenti[node.itemId].suppClass[c] + node.local_suppClass[c];
}
child = node.children;
while ( (child != null) && ( livello > 1 ) ) {
aggiornaPresenze(child, itempresenti, livello-1);
child = child.next;
}
return;
}
public void combineItem(CHeaderTable htab, CMacroItem[] condition,int minedepth,int comb_size,PrintWriter file,int supp_thres,CMacroItem[] coda,int dimcoda) {
CMacroItem[] comb = new CMacroItem[comb_size] ;
CFrequentItem[] freq_array;
for ( int t = 0 ; t<comb_size ; t++ ) {
comb[t] = new CMacroItem();
}
int sitem ;
if ( htab == null )
return;
if ( htab.frequentCount == 0)
return;
freq_array = htab.frequentArray;
int cl = 0;
for ( int ht=htab.frequentCount-1 ; ht >= 0 ; ht-- ) {
sitem = 1;
condition[minedepth-1].vettItemId[0] = freq_array[ht].itemId;
int pos = 0;
while ( pos != freq_array[ht].itemAccorpati.size()) {
condition[minedepth-1].vettItemId[sitem] = freq_array[ht].itemAccorpati.get(pos);
sitem++;
pos++;
}
condition[minedepth-1].numItem = sitem;
storeItemset(condition, minedepth, comb, cl, freq_array[ht], 0, 0, null, file, supp_thres, coda, dimcoda);
sitem = 1;
pos = 0;
comb[cl].vettItemId[0] = freq_array[ht].itemId;
while ( pos != freq_array[ht].itemAccorpati.size()) {
comb[cl].vettItemId[sitem] = freq_array[ht].itemAccorpati.get(pos);
sitem++;
pos++;
}
comb[cl].numItem = sitem;
cl++;
}
return;
}
public void storeItemset(CMacroItem[] condition,int condition_lenght ,CMacroItem[] comb,int comb_size,CFrequentItem items ,int position ,int store_level,CMacroItem[] itemset,PrintWriter piw,int supp_thres,CMacroItem[] coda,int dimcoda) {
double regole_rappresentate;
double gen_rapr;
int start = 0;
CItemCorpo cit = new CItemCorpo(RULE_MAX_LENGHT);
StringBuffer corpo = new StringBuffer();
StringBuffer singolo_item = new StringBuffer();
StringBuffer regola_max = new StringBuffer();
int itemc = 0;
if ( itemset == null ) {
regole_rappresentate = gen_rapr = 1;
corpo.append("{");
for ( int i = 0 ; i<condition_lenght-1 ; i++) {
corpo.append("(");
for ( int j = 0 ; j<condition[i].numItem-1 ; j++ ) {
corpo.append(condition[i].vettItemId[j]+",");
cit.item_corpo[itemc] = condition[i].vettItemId[j];
itemc++;
}
corpo.append(condition[i].vettItemId[condition[i].numItem-1]+"),");
cit.item_corpo[itemc] = condition[i].vettItemId[condition[i].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate * combinazioni(condition[i].numItem);
gen_rapr = gen_rapr * condition[i].numItem;
}
corpo.append("(");
for ( int k = 0 ; k<condition[condition_lenght-1].numItem-1 ; k++) {
corpo.append(condition[condition_lenght-1].vettItemId[k]+",");
cit.item_corpo[itemc] = condition[condition_lenght-1].vettItemId[k];
itemc++;
}
corpo.append(condition[condition_lenght-1].vettItemId[condition[condition_lenght-1].numItem-1]+")}");
cit.item_corpo[itemc] = condition[condition_lenght-1].vettItemId[condition[condition_lenght-1].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate * combinazioni(condition[condition_lenght-1].numItem);
gen_rapr = gen_rapr * condition[condition_lenght-1].numItem;
if ( dimcoda > 0 ) {
for ( int h = 0 ; h<dimcoda-1 ; h++) {
corpo.append("(");
for ( int g = 0 ; g<coda[h].numItem-1 ; g++ ) {
corpo.append(coda[h].vettItemId[g]+",");
cit.item_corpo[itemc] = coda[h].vettItemId[g];
itemc++;
}
corpo.append(coda[h].vettItemId[coda[h].numItem-1]+"),");
cit.item_corpo[itemc] = coda[h].vettItemId[coda[h].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate * (combinazioni(coda[h].numItem)+1);
}
corpo.append("(");
for ( int l = 0 ; l<coda[dimcoda-1].numItem-1 ; l++) {
corpo.append(coda[dimcoda-1].vettItemId[l]+",");
cit.item_corpo[itemc] = coda[dimcoda-1].vettItemId[l];
itemc++;
}
corpo.append(coda[dimcoda-1].vettItemId[coda[dimcoda-1].numItem-1]+")");
cit.item_corpo[itemc] = coda[dimcoda-1].vettItemId[coda[dimcoda-1].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate * (combinazioni(coda[dimcoda-1].numItem)+1);
} else {
corpo.append("()");
}
cit.ordina(0,itemc-1);
for ( int ic = 0 ; ic < itemc ; ic++) {
regola_max.append(cit.item_corpo[ic]+" ");
}
if ( (items.suppClass[classeDesiderata] >= supp_thres) && ((double)(100.0 * (float)(items.suppClass[classeDesiderata])/(float)(items.supp)) >= CMain.conf_threshold) ) {
DecimalFormat format = new DecimalFormat("###0.00",new DecimalFormatSymbols(new Locale("EN")));
FieldPosition field = new FieldPosition(0);
format.format(((100.0*items.suppClass[classeDesiderata])/items.supp),s,field);
piw.append(corpo+" -> "+(CMain.idBaseClasse+classeDesiderata)+" "+items.suppClass[classeDesiderata]+" "+s+" "+itemc+" "+regola_max+"\n");
piw.flush();
corpo.delete(0, corpo.length());
regola_max.delete(0, regola_max.length());
s.delete(0, s.length());
regoleTotali = regoleTotali+regole_rappresentate;
macroRegoleTotali++;
}
if ( comb_size != 0) {
itemset = new CMacroItem[comb_size];
for ( int q = 0 ; q < comb_size ; q++ ) {
itemset[q] = new CMacroItem();
}
storeItemset(condition, condition_lenght, comb, comb_size, items, 0, 1, itemset, piw, supp_thres, coda, dimcoda);
}
}
else {
start = position;
position = comb_size-1;
while ( position >= start ) {
itemset[store_level-1] = comb[position];
regole_rappresentate = 1;
gen_rapr = 1;
corpo.append("{");
for ( int i = 0 ; i<store_level ; i++) {
corpo.append("(");
for ( int j = 0 ; j<itemset[i].numItem-1 ; j++ ) {
corpo.append(itemset[i].vettItemId[j]+",");
cit.item_corpo[itemc] = itemset[i].vettItemId[j];
itemc++;
}
corpo.append(itemset[i].vettItemId[itemset[i].numItem-1]+"),");
cit.item_corpo[itemc] = itemset[i].vettItemId[itemset[i].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate*combinazioni(itemset[i].numItem);
gen_rapr = gen_rapr * itemset[i].numItem;
}
for ( int k= 0 ; k<condition_lenght-1 ; k++ ) {
corpo.append("(");
for ( int c = 0 ; c<condition[k].numItem-1; c++) {
corpo.append(condition[k].vettItemId[c]+",");
cit.item_corpo[itemc] = condition[k].vettItemId[c];
itemc++;
}
corpo.append(condition[k].vettItemId[condition[k].numItem-1]+"),");
cit.item_corpo[itemc] = condition[k].vettItemId[condition[k].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate*combinazioni(condition[k].numItem);
gen_rapr = gen_rapr*condition[k].numItem;
}
corpo.append("(");
for ( int e = 0 ; e<condition[condition_lenght-1].numItem-1 ; e++ ) {
corpo.append(condition[condition_lenght-1].vettItemId[e]+",");
cit.item_corpo[itemc] = condition[condition_lenght-1].vettItemId[e];
itemc++;
}
corpo.append(condition[condition_lenght-1].vettItemId[condition[condition_lenght-1].numItem-1]+")}");
cit.item_corpo[itemc] = condition[condition_lenght-1].vettItemId[condition[condition_lenght-1].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate*combinazioni(condition[condition_lenght-1].numItem);
gen_rapr = gen_rapr * condition[condition_lenght-1].numItem;
if ( dimcoda>0 ) {
for (int o=0 ; o<dimcoda-1 ; o++ ) {
corpo.append("(");
for ( int u=0 ; u<coda[o].numItem-1; u++ ) {
corpo.append(coda[o].vettItemId[u]+",");
cit.item_corpo[itemc] = coda[o].vettItemId[u];
itemc++;
}
corpo.append(coda[o].vettItemId[coda[o].numItem-1]+"),");
cit.item_corpo[itemc] = coda[o].vettItemId[coda[o].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate*(combinazioni(coda[o].numItem)+1);
}
corpo.append("(");
for ( int a=0 ; a<coda[dimcoda-1].numItem-1 ; a++ ) {
corpo.append(coda[dimcoda-1].vettItemId[a]+",");
cit.item_corpo[itemc] = coda[dimcoda-1].vettItemId[a];
itemc++;
}
corpo.append(coda[dimcoda-1].vettItemId[coda[dimcoda-1].numItem-1]+")");
cit.item_corpo[itemc] = coda[dimcoda-1].vettItemId[coda[dimcoda-1].numItem-1];
itemc++;
regole_rappresentate = regole_rappresentate*(combinazioni(coda[dimcoda-1].numItem)+1);
} else {
corpo.append("()");
}
cit.ordina(0,itemc-1);
for ( int ic = 0 ; ic<itemc ; ic++) {
singolo_item.append(cit.item_corpo[ic]+" ");
}
if ( (items.suppClass[classeDesiderata] >= supp_thres) && ((double)(100.0 * (float)(items.suppClass[classeDesiderata])/(float)(items.supp)) >= CMain.conf_threshold) ) {
DecimalFormat format = new DecimalFormat("###0.00",new DecimalFormatSymbols(new Locale("EN")));
FieldPosition field = new FieldPosition(0);
format.format(((100.0*items.suppClass[classeDesiderata])/items.supp),s,field);
piw.append(corpo+" -> "+(CMain.idBaseClasse+classeDesiderata)+" "+items.suppClass[classeDesiderata]+" "+s+" "+itemc+" "+singolo_item+"\n");//regola_max+"\n");
piw.flush();
corpo.delete(0, corpo.length());
singolo_item.delete(0, singolo_item.length());
s.delete(0, s.length());
regoleTotali = regoleTotali+regole_rappresentate;
macroRegoleTotali++;
}
storeItemset(condition, condition_lenght, comb, comb_size, items, position+1, store_level+1, itemset, piw, supp_thres, coda, dimcoda);
position--;
}
}
}
public CHeaderTable headerTableCreate(CItemEntry tmptab,int supp_thres,int frequent_count) {
CHeaderTable htab = new CHeaderTable(frequent_count);
CItemEntry itemd;
htab.frequentCount = frequent_count;
if ( frequent_count != 0) {
itemd = tmptab;
int i = 0;
while ( itemd != null ) {
if ( itemd.suppClass[classeDesiderata] >= supp_thres ) {
htab.frequentArray[i].itemId = itemd.itemId;
htab.frequentArray[i].supp = itemd.supp;
htab.frequentArray[i].accorpato = 0;
htab.frequentArray[i].itemAccorpati = new LinkedList<Integer>();
for ( int b = 0 ; b < MAX_CLASSES ; b++) {
htab.frequentArray[i].suppClass[b] = itemd.suppClass[b];
}
htab.frequentArray[i].nodeLinkCounter = 0;
i++;
}
itemd = itemd.next;
}
htab.quicksort(0, frequent_count-1);
}
return htab;
}
public int accresciCodaperCondition(CMacroItem[] coda,int dimcoda,CHeaderTable htab, int suppcondition) {
int ht;
int sitem;
CFrequentItem[] freq_array;
freq_array = htab.frequentArray;
for ( ht = 0 ; ht<htab.frequentCount ; ht++ ) {
if ( freq_array[ht].supp == suppcondition ) {
sitem = 1;
coda[dimcoda].vettItemId[0] = freq_array[ht].itemId;
Iterator<Integer> it = freq_array[ht].itemAccorpati.iterator();
while ( it.hasNext()) {
coda[dimcoda].vettItemId[sitem] = it.next();
sitem++;
}
coda[dimcoda].numItem = sitem;
dimcoda++;
}
}
return dimcoda;
}
public CFptree fptCreate(CHeaderTable htab) {
CFptree fp;
CFpNode parent;
CFpNode current;
CFpNode nodelink;
CFpNode pattern_node;
CItem local = new CItem(MAX_CLASSES);
CFrequentItem[] freq_array;
int found;
noditree = 0;
fp = new CFptree(MAX_CLASSES);
fp.branches = 0;
noditree++;
freq_array = htab.frequentArray;
nodelink = node_pattern_base.head;
int nlcounter = node_pattern_base.nodeLinkCounter;
for ( int inl = 0 ; inl<nlcounter ; inl++) {
local.supp = nodelink.local_supp;
for ( int c = 0 ; c<MAX_CLASSES ; c++) {
local.suppClass[c] = nodelink.local_suppClass[c];
}
parent = fp.root;
for ( int ih = htab.frequentCount-1 ; ih>=0 ; ih--) {
pattern_node = nodelink.parent;
found = 0;
while ( (found == 0) && (pattern_node!=null) ) {
if ( pattern_node.itemId == freq_array[ih].itemId )
found = 1;
else
pattern_node = pattern_node.parent;
}
if ( found == 1) {
if ( (current = insertNode(parent,freq_array[ih],local,fp)) == null ) {
return null;
}
noditree+=create_node;
parent = current;
}
}
nodelink = nodelink.next;
}
return fp;
}
public CFpNode insertNode(CFpNode parent,CFrequentItem header_table_entry,CItem local,CFptree fp) {
int found;
CChildPtr curr_elem,prev_elem,tmp_elem;
CFpNode tmp_child;
found = 0;
create_node = 0;
curr_elem = parent.children;
prev_elem = parent.children;
while ( (curr_elem != null ) && (found == 0) ) {
if ( curr_elem.child.itemId == header_table_entry.itemId) {
found = 1;
} else {
prev_elem = curr_elem;
curr_elem = curr_elem.next;
}
}
if ( found == 1) {
curr_elem.child.local_supp += local.supp;
for ( int c = 0 ; c < MAX_CLASSES ; c++) {
curr_elem.child.local_suppClass[c] += local.suppClass[c];
}
parent = curr_elem.child;
}
else {
if ( (parent.children == null) && (parent.parent == null) ) {
fp.setBranches(1);
} else {
if ( parent.children != null ) {
fp.setBranches(2);
}
}
tmp_elem = new CChildPtr(MAX_CLASSES);
create_node = 1;
tmp_child = tmp_elem.child;
tmp_child.itemId = header_table_entry.itemId;
tmp_child.parent = parent;
tmp_child.local_supp = local.supp;
for ( int h = 0 ; h<MAX_CLASSES ; h++) {
tmp_child.local_suppClass[h] = local.suppClass[h];
}
tmp_child.next = header_table_entry.head;
if ( prev_elem == null ) {
parent.children = tmp_elem;
} else {
prev_elem.next = tmp_elem ;
}
header_table_entry.incCounter(1);
header_table_entry.head = tmp_child;
parent = tmp_child;
}
return parent;
}
public double combinazioni( int numero ) {
if ( numero < 1)
return 0;
double totale = 1;
for ( int i = 0 ; i<numero ; i++) {
totale = totale * 2;
}
return totale-1;
}
}