package ca.pfv.spmf.algorithms.associationrules.TopKRules_and_TNR;
/* This file is copyright (c) 2008-2012 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/**
* This class represents a transaction database
* optimized for the TNR and TopKRules algorithms for top-k
* association rule mining.
* <br/><br/>
*
* It contains a list of transaction implemented as linked-lists
* and keep track of the largest item ids and the total number
* of transactions in the database.
* <br/><br/>
*
* The main method is loadFile() for reading a file. But transactions
* can also be added manually by calling the method addTransaction.
*
* @see AlgoTNR
* @see AlgoTopKRules
* @see Transaction
* @author Philippe Fournier-Viger
*/
public class Database {
/** the largest item in the database */
public int maxItem = 0;
/** the number of transactions in the database */
public int tidsCount =0;
/** The list of transactions in this database */
private final List<Transaction> transactions = new ArrayList<Transaction>();
/**
* Load a file from a file path.
* @param path the input file path
* @throws IOException exception if an error while writing the file
*/
public void loadFile(String path) throws IOException {
BufferedReader myInput = null;
try {
// open the file
FileInputStream fin = new FileInputStream(new File(path));
myInput = new BufferedReader(new InputStreamReader(fin));
// for each line (transaction into the file until the end of the file
String thisLine;
while ((thisLine = myInput.readLine()) != null) {
// if the line is a comment, is empty or is a
// kind of metadata
if (thisLine.isEmpty() == true ||
thisLine.charAt(0) == '#' || thisLine.charAt(0) == '%'
|| thisLine.charAt(0) == '@') {
continue;
}
// split the line according to spaces and process it
addTransaction(thisLine.split(" "));
}
} catch (Exception e) {
e.printStackTrace();
}finally {
if(myInput != null){
myInput.close();
}
}
}
/**
* Read a transaction from the file and store it into memory
* @param itemsString an array of items, where an item is an integer represented as a String.
*/
public void addTransaction(String itemsString[]){
// we create an object Transaction to store the items
Transaction transaction = new Transaction(itemsString.length);
// for each item (String)
for(String itemString : itemsString){
// if it is empty, skip it
if("".equals(itemString)){
continue;
}
// convert from string to integer
int item = Integer.parseInt(itemString);
// if the item is larger than the largest item, remember that
if(item >= maxItem){
maxItem = item;
}
// add the item to the transaction
transaction.addItem(item);
}
// increase the number of transactions
tidsCount++;
// add the transaction to the transaction database
transactions.add(transaction);
// Sort transactions by descending order of items because
// TopKRules and TNR assume that items are sorted by lexical order
// for optimization.
Collections.sort(transaction.getItems(), new Comparator<Integer>(){
public int compare(Integer o1, Integer o2) {
return o2-o1;
}});
}
/**
* Get the number of transactions.
* @return the number of transactions
*/
public int size(){
return transactions.size();
}
/**
* Get the list of transactions in this database.
* @return a List of Transactions
*/
public List<Transaction> getTransactions() {
return transactions;
}
// public int checkDatabaseSize(String path) throws IOException {
// int databaseSize =0;
// String thisLine;
// BufferedReader myInput = null;
// try {
// FileInputStream fin = new FileInputStream(new File(path));
// myInput = new BufferedReader(new InputStreamReader(fin));
// while ((thisLine = myInput.readLine()) != null) {
// databaseSize++;
// }
// } catch (Exception e) {
// e.printStackTrace();
// }finally {
// if(myInput != null){
// myInput.close();
// }
// }
// return databaseSize;
// }
}