package ca.pfv.spmf.algorithms.frequentpatterns.lcm; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Pattern; /* This file is copyright (c) 2012-2014 Alan Souza * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. */ /** * This is the parser class for the dataset. * It has actions related to parse a txt based file to a Dataset class. * * @see AlgoLCM * @author Alan Souza <apsouza@inf.ufrgs.br> */ public class Dataset { private List<Transaction> transactions; private Integer[] transactionsItems; Set<Integer> uniqueItems = new HashSet<Integer>(); private int maxItem = 0; public Dataset(String datasetPath) throws IOException { transactions = new ArrayList<Transaction>(); BufferedReader br = new BufferedReader(new FileReader(datasetPath)); String items; while((items = br.readLine()) != null) { // iterate over the lines to build the transaction // if the line is a comment, is empty or is metadata if (items.isEmpty() == true || items.charAt(0) == '#' || items.charAt(0) == '%' || items.charAt(0) == '@') { continue; } getTransactions().add(createTransaction(items)); } br.close(); /// sort transactions by increasing last item (optimization) Collections.sort(transactions, new Comparator<Transaction>() { public int compare(Transaction arg0, Transaction arg1) { // return arg0.getItems().length - arg1.getItems().length; return arg0.getItems()[arg0.getItems().length -1] - arg1.getItems()[arg1.getItems().length -1]; }}); // create the list of items in the database and sort it transactionsItems = new Integer[uniqueItems.size()]; int i=0; for(Integer item : uniqueItems) { transactionsItems[i++] = item; } Arrays.sort(transactionsItems); // TODO : maybe sorting items and transactions by decreasing order of support // could help } /** * Create a transaction object from a line from the input file * @param line a line from input file * @return a transaction */ private Transaction createTransaction(String line) { //build the items Pattern splitPattern = Pattern.compile(" "); String[] items = splitPattern.split(line); Integer[] itemsSorted = new Integer[items.length]; for (int i = 0; i < items.length; i++) { Integer item = Integer.valueOf(items[i]); itemsSorted[i] = item; uniqueItems.add(item); } // update max item by checking the last item of the transaction int lastItem = itemsSorted[itemsSorted.length - 1]; if(lastItem > maxItem) { maxItem = lastItem; } return new Transaction(itemsSorted); } public List<Transaction> getTransactions() { return transactions; } public Set<Integer> getUniqueItems() { return uniqueItems; } // public Integer[] getAllItems() { // // return transactionsItems; // } public int getMaxItem() { return maxItem; } @Override public String toString() { StringBuilder datasetContent = new StringBuilder(); for(Transaction transaction : transactions) { datasetContent.append(transaction); datasetContent.append("\n"); } return datasetContent.toString(); } }