package ca.pfv.spmf.algorithms.sequentialpatterns.goKrimp; import java.io.BufferedReader; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; /** * This file reads data with different formats for the GoKrimp algorithm. * <br/><br/> * * Copyright (c) 2014 Hoang Thanh Lam (TU Eindhoven and IBM Research) * Toon Calders (Université Libre de Bruxelles), Fabian Moerchen (Amazon.com inc) * and Dmitriy Fradkin (Siemens Corporate Research) * <br/><br/> * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * <br/><br/> * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * <br/><br/> * * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * <br/><br/> * * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. * * @see AlgoGoKrimp * @see Event * @see MyPattern * @see SignTest * @author Hoang Thanh Lam (TU Eindhoven and IBM Research) */ public class DataReader { AlgoGoKrimp readData(String databasename, String labelfilename){ AlgoGoKrimp gk=new AlgoGoKrimp(); gk.labels=readLabel(labelfilename); gk.data=new ArrayList(); try{ DataInputStream in; FileInputStream fstream = new FileInputStream(databasename); in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; int size=0; while((strLine = br.readLine()) != null){ String[] temp; String delimiter = " "; temp = strLine.split(delimiter); ArrayList<Event> s=new ArrayList(); gk.data.add(s); int ts=0,prev=0; size++; for(int i=0;i<temp.length;i++){ Event e=new Event(); e.id=Integer.parseInt(temp[i]); e.ts=ts; e.gap=ts-prev; prev=ts; gk.data.get(gk.data.size()-1).add(e); ts++; /*if(ts%100==0) System.out.println(e.id); else System.out.print(e.id+" ");*/ } } System.err.println("data size:"+ size); in.close(); }catch (IOException e){ System.err.println("Error: " + e.getMessage()); } return gk; } /** * read the data in the SPMF format * @param databasename * @return */ public AlgoGoKrimp readData_SPMF(String databasename, String labelfilename){ AlgoGoKrimp gk=new AlgoGoKrimp(); gk.labels=readLabel(labelfilename); gk.data=new ArrayList(); try{ DataInputStream in; FileInputStream fstream = new FileInputStream(databasename); in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; while((strLine = br.readLine()) != null){ String[] temp; String delimiter = " "; temp = strLine.split(delimiter); ArrayList<Event> s=new ArrayList(); gk.data.add(s); int ts=0,prev=0; for(int i=0;i<temp.length;i++){ if(temp[i].contains("-")) continue; Event e=new Event(); e.id=Integer.parseInt(temp[i])-1; e.ts=ts; e.gap=ts-prev; prev=ts; gk.data.get(gk.data.size()-1).add(e); ts++; } } in.close(); }catch (IOException e){ System.err.println("Error: " + e.getMessage()); } return gk; } HashMap<Integer,String> readLabel(String dataname){ HashMap<Integer,String> labels= new HashMap(); File file = new File(dataname); if(file.exists()){ //the label file with such name does not exist return labels; } try{ DataInputStream in; FileInputStream fstream = new FileInputStream(dataname); in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; int k=0; while((strLine = br.readLine()) != null){ labels.put(k, strLine); k++; } in.close(); }catch (IOException e){ System.err.println("Warning: " + e.getMessage()); } return labels; } }