package ca.pfv.spmf.tools.dataset_generator; /* This file is copyright (c) 2008-2012 Philippe Fournier-Viger * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. */ import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Random; /** * This class is a random transaction database generator such that the user provides some parameters and this class generate a transaction database * that is written to the disk. * * @author Philippe Fournier-Viger */ public class TransactionDatabaseGenerator { // the random number generator private static Random random = new Random(System.currentTimeMillis()); /** * This method randomly generates a transaction database according to parameters provided. * @param transactionCount transaction count the number of transactions required * @param maxDistinctItems the maximum number of distinct items * @param maxItemCountPerTransaction the maximum number of items per transaction * @param output the file path for writting the generated database * @throws IOException */ public void generateDatabase(int transactionCount, int maxDistinctItems, int maxItemCountPerTransaction, String output) throws IOException { // We create a BufferedWriter to write the database to disk BufferedWriter writer = new BufferedWriter(new FileWriter(output)); // For the number of transactions to be generated for (int i = 0; i < transactionCount; i++) { // if it is not the first one, we write on a new line if(i != 0){ writer.newLine(); } // We randomly decide how many items will appear in this transaction int itemCount = random.nextInt(maxItemCountPerTransaction) + 1; // This hashset will be used to remember which items have // already been added to this itemset. HashSet<Integer> alreadyAdded = new HashSet<Integer>(); // create an arraylist to store items from the itemset that will be generated List<Integer> itemset = new ArrayList<Integer>(); // for the number of items that was decided above for (int j = 0; j < itemCount; j++) { // we generate the item randomly and write it to disk int item = random.nextInt(maxDistinctItems) + 1; // if we already added this item to this itemset // we choose another one while(alreadyAdded.contains(item)){ item = random.nextInt(maxDistinctItems) + 1; } alreadyAdded.add(item); itemset.add(item); } // sort the itemset Collections.sort(itemset); // write the itemset for(int j=0; j< itemset.size(); j++){ if(j != 0){ writer.write(" "); } writer.write(""+itemset.get(j)); } } writer.close(); // close the file. } }