package it.unito.geosummly.clustering.subspace; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN; import de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SUBCLU; import de.lmu.ifi.dbs.elki.data.Cluster; import de.lmu.ifi.dbs.elki.data.Clustering; import de.lmu.ifi.dbs.elki.data.DoubleVector; import de.lmu.ifi.dbs.elki.data.model.Model; import de.lmu.ifi.dbs.elki.data.model.SubspaceModel; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection; import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; import de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter; import de.lmu.ifi.dbs.elki.distance.distancefunction.strings.LevenshteinDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.IntegerDistance; import de.lmu.ifi.dbs.elki.index.Index; import de.lmu.ifi.dbs.elki.result.ResultUtil; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; public class Main { private Double SUBCLU_esp = 0.01; private int SUBCLU_minpts = 20; private Double DBSCAN_esp = 0.0005; private int DBSCAN_minpts = 100; private Double GEOSUBCLU_esp = 0.1; private int GEOSUBCLU_minpts = 5; public static void main(String[] args) { Main main = new Main(); // SortedMap of doubles where key is the row of the dataset, // while values are the values of each cell expressed in doub //Database db = main.makeSimpleDatabase("subspace-simple-1.csv", 1000, new ListParameterization(), null); Database db = main.buildFromMatrix("data/sample.csv"); //Database db = main.buildFromMatrix("sample-subclu.csv"); Collection<Index> indexes = db.getIndexes(); for (Index i : indexes) { System.out.println(i.getLongName()); } Clustering<?> result = main.runGEOSUBCLU(db); //we do not really need Outliers, since the definition is given here http://elki.dbs.ifi.lmu.de/wiki/Tutorial/Outlier ArrayList<OutlierResult> ors = ResultUtil.filterResults(result, OutlierResult.class); System.out.print("outlier:"); for (OutlierResult o : ors) { Relation<Double> scores = o.getScores(); for (DBIDIter iter = scores.iterDBIDs(); iter.valid(); iter.advance()) { System.out.println(DBIDUtil.toString(iter) + " " + scores.get(iter)); } } System.out.println("\nclusters:"); ArrayList<Clustering<?>> cs = ResultUtil.filterResults(result, Clustering.class); int j = 0; HashMap<Integer, Integer> map = new HashMap<>(); for (Clustering<?> c : cs) { for (Cluster<?> cluster : c.getAllClusters()) { for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) { System.out.print(DBIDUtil.toString(iter)+" "); ++j; map.put(Integer.parseInt(DBIDUtil.toString(iter)), 0); } System.out.println(); } //System.out.println(i); //System.out.println(map.size()); } System.out.println("--------------------------------------------------"); Integer i = 0; String[] labels = new String[ db.getRelation(TypeUtil.ANY).size() ]; for (Clustering<?> c : cs) for (Cluster<?> cluster : c.getAllClusters()) { if(i != 0) { for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) { //System.out.println(DBIDUtil.asInteger(iter)); if (labels[ DBIDUtil.asInteger(iter) - 1 ] == null) labels[ DBIDUtil.asInteger(iter) - 1 ] = "Label".concat(i.toString()) ; } } ++i; } for (i = 0; i<labels.length; i++) { //System.out.println(new Integer(i+1).toString().concat(" ").concat(labels[i])); System.out.println( (labels[i]== null ) ? "0" : labels[i]); } List<Clustering<? extends Model>> clusterresults = ResultUtil.getClusteringResults(result); for (Clustering<?> c : clusterresults){ } } public Clustering<?> runSUBCLU (Database db) { ListParameterization params = new ListParameterization(); params.addParameter(SUBCLU.EPSILON_ID, SUBCLU_esp); params.addParameter(SUBCLU.MINPTS_ID, SUBCLU_minpts); // setup algorithm SUBCLU<DoubleVector> subclu = ClassGenericsUtil.parameterizeOrAbort(SUBCLU.class, params); // run SUBCLU on database Clustering<SubspaceModel<DoubleVector>> result = subclu.run(db); return result; } public Clustering<?> runGEOSUBCLU (Database db) { ListParameterization params = new ListParameterization(); //params.addParameter(GEOSUBCLU.EPSILON_ID, GEOSUBCLU_esp); //params.addParameter(GEOSUBCLU.MINPTS_ID, GEOSUBCLU_minpts); //params.addParameter(FixedDBIDsFilter.IDSTART_ID, 1); // setup algorithm GEOSUBCLU<DoubleVector> geosubclu = ClassGenericsUtil.parameterizeOrAbort(GEOSUBCLU.class, params); // run GEOSUBCLU on database Clustering<SubspaceModel<DoubleVector>> result = geosubclu.run(db); return result; } private <T> Database buildFromMatrix (String file) { List<ArrayList<Double>> matrix = new ArrayList<ArrayList<Double>>(); try { BufferedReader br = new BufferedReader(new FileReader(file)); String line; br.readLine(); while ((line = br.readLine())!=null) { ArrayList<Double> temp = new ArrayList<>(); String[] tokens = line.split(","); for (int j=0; j < tokens.length; j++ ) { temp.add(Double.parseDouble(tokens[j])); } matrix.add(temp); } br.close(); } catch (IOException e) { e.printStackTrace(); } double[][] data = new double[matrix.size()][]; for (int i=0; i<matrix.size(); i++) { data[i] = new double[matrix.get(i).size()]; for(int j=0; j<matrix.get(i).size(); j++) { data[i][j] = (matrix.get(i)).get(j); } } List<Class<?>> filterlist = new ArrayList<>(); filterlist.add(FixedDBIDsFilter.class); Database db = new InMemoryDatabase(new ArrayAdapterDatabaseConnection(data), null); db.initialize(); Relation<?> rel = db.getRelation(TypeUtil.ANY); //System.out.println("size of the relations: " + rel.size()); return db; } // private <T> Database makeSimpleDatabase ( // String filename, // int expectedSize, // ListParameterization params, // Class<?>[] filters // ) // { // params.addParameter(FileBasedDatabaseConnection.INPUT_ID, filename); // // List<Class<?>> filterlist = new ArrayList<>(); // filterlist.add(FixedDBIDsFilter.class); // if(filters != null) { // for(Class<?> filter : filters) { // filterlist.add(filter); // } // } // params.addParameter(FileBasedDatabaseConnection.FILTERS_ID, filterlist); // params.addParameter(FixedDBIDsFilter.IDSTART_ID, 1); // // Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params); // // db.initialize(); // Relation<?> rel = db.getRelation(TypeUtil.ANY); // // System.out.println("size of the relations: " + rel.size()); // // return db; // } }