package ca.pfv.spmf.tools.dataset_generator; /* This file is copyright (c) 2008-2012 Philippe Fournier-Viger * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. */ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import ca.pfv.spmf.input.sequence_database_array_integers.Sequence; import ca.pfv.spmf.input.sequence_database_array_integers.SequenceDatabase; /** * This class is to read a sequence database in SPMF format, add consecutive timestamps * to itemsets in each sequence, and then write the sequence database to a file. * * @author Philippe Fournier-Viger */ public class AddTimeStampsToSequenceDatabase { /** * Read a sequence database in SPMF format, add timestamps and then * write the sequence database to a file. * Note that this code could be further optimized if performance is an issue * (instead of loading the file into memory, it could be loaded line by line). * @param inputFile the path of a sequence database in SPMF format * @param outputFile the output path for writing the database with timestamps * @throws IOException if error while reading/writing file */ public void convert(String inputFile, String outputFile) throws IOException { // we create an object fro writing the output file BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); SequenceDatabase database = new SequenceDatabase(); database.loadFile(inputFile); BufferedReader myInput = null; try { for(int i=0; i < database.getSequences().size(); i++) { Sequence sequence = database.getSequences().get(i); // for each itemset in this sequence for(int j = 0; j< sequence.getItemsets().size(); j++) { // write timestamp writer.write("<" + j + "> "); Integer[] itemset = sequence.get(j); for(Integer item : itemset) { writer.write(item + " "); } writer.write("-1 "); } writer.write(" -2"); // start a new line writer.newLine(); } // close the output file writer.close(); } catch (Exception e) { e.printStackTrace(); } finally { if (myInput != null) { myInput.close(); } } } }