package ca.pfv.spmf.algorithms.sequentialpatterns.fournier2008_seqdim.kmeans_for_fournier08;
/* This file is copyright (c) 2008-2013 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import ca.pfv.spmf.algorithms.sequentialpatterns.fournier2008_seqdim.AlgoFournierViger08;
import ca.pfv.spmf.algorithms.sequentialpatterns.fournier2008_seqdim.ItemValued;
/**
* This class represents a Cluster for the Kmeans version used
* by the Fournier-Viger-2008 algorithm.
*
* It is different from the general K-Means implementation in the package "clustering" in that
* it is designed to use valued item (an item with a value) rather than clustering vectors of
* double values. It also keep the average, higher and lower values of each clusters.
*
* @see AlgoKMeans_forFournier08
* @see AlgoKMeansWithSupport
*@see AlgoFournierViger08
* @author Philippe Fournier-Viger
*/
public class Cluster {
// the list of items in this cluster
private List<ItemValued> items;
// the average value of this cluster
private double average;
// the highest value of this cluster
private double higher =0;
// the lowest value of this cluster
private double lower = Double.MAX_VALUE;
// the sum of the value in this cluster (used
// to calculate the mean efficiently)
private double sum = 0;
// the list of sequence IDs
private Set<Integer> sequenceIDs = null;
/**
* Constructor of an empty cluster with a specified average (for K-means)
* @param average the average
*/
public Cluster(double average){
this.items = new ArrayList<ItemValued>();
this.average = average;
}
/**
* Constructor of a cluster with a list of items
* @param newItems a list of items
*/
public Cluster(List<ItemValued> newItems){
this.items = new ArrayList<ItemValued>(newItems);
// calculate the average
recomputeClusterAverage();
}
/**
* Constructor of a cluster by adding two list of items.
* @param newItems a first list of items.
* @param newItems2 a second list of items.
*/
public Cluster(List<ItemValued> newItems, List<ItemValued> newItems2){
// add the first list
this.items = new ArrayList<ItemValued>(newItems);
// add the second list
items.addAll(newItems2);
// calculate the average
recomputeClusterAverage();
}
/**
* Constructor of a cluster with a single item
* @param item the item
*/
public Cluster(ItemValued item){
// add the item
this.items = new ArrayList<ItemValued>();
this.items.add(item);
// calculate the sum
sum+= item.getValue();
// calculate average
this.average = item.getValue();
}
/**
* Add items from another cluster to this cluster.
* @param cluster2 the other cluster.
*/
public void addItemsFromCluster(Cluster cluster2){
// for each item in the other cluster
for(ItemValued item : cluster2.getItems()){
// add it to this cluster
getItems().add(item);
// update the sum
sum+= item.getValue();
}
}
/**
* Add an item to this cluster
* @param item the item
*/
public void addItem(ItemValued item) {
// add the item
getItems().add(item);
// update the sum
sum += item.getValue();
}
/**
* Add a list of items to this cluster
* @param newItems a list of items
*/
public void addItems(List<ItemValued> newItems) {
// for each item
for(ItemValued item : newItems){
// add it
this.getItems().add(item);
// update the sum
sum += item.getValue();
}
}
/**
* Get the list of items in this cluster.
* @return a list of items
*/
public List<ItemValued> getItems() {
return items;
}
/**
* Get the number of items stored in this cluster.
* @return an integer.
*/
public int size(){
return getItems().size();
}
/**
* Get the average of this cluster.
* @return the average
*/
public double getaverage() {
return average;
}
/**
* Get a string representation of this cluster
* @return a string
*/
public String toString(){
// for each item, print it
StringBuilder buffer = new StringBuilder("(");
for(ItemValued item : getItems()){
buffer.append(item.getValue());
buffer.append(" ");
}
// append the average, the minimum item and the maximum item
// in the cluster
buffer.append(") <");
buffer.append(average);
buffer.append(", min=");
buffer.append(getLower());
buffer.append(" max=");
buffer.append(getHigher());
buffer.append(">");
return buffer.toString();
}
/**
* Calculate the average of items in the cluster.
*/
public void recomputeClusterAverage() {
// if no item, don't do anything
if(getItems().isEmpty()){
return;
}
// if one item, then it is the average..
if(getItems().size() ==1){
average = getItems().get(0).getValue();
return;
}
// otherwise, calculate the average as the sum
// divided by the number of items.
average = sum /((double)items.size());
}
/**
* Compute the smallest and largest values of this cluster
*/
public void computeHigherAndLower(){
// for each item
for(ItemValued item : getItems()){
// if the largest until now, remember it
if(item.getValue() > higher){
higher = item.getValue();
}
// if the smallest until now, remember it
if(item.getValue() < lower){
lower = item.getValue();
}
}
}
/**
* Check if this cluster contains a given item.
* @param item2 the given item
* @return true if the item is contained, otherwise, false.
*/
public boolean containsItem(ItemValued item2) {
// for each item
for(ItemValued item : getItems()){
// if it is the item, return true
if(item == item2){
return true;
}
}
// the item was not found, so return false
return false;
}
/**
* Get the largest item in this cluster.
* @return a double
*/
public double getHigher() {
return higher;
}
/**
* Get the smallest item in this cluster
* @return a double
*/
public double getLower() {
return lower;
}
/**
* Get the item ID associated to this cluster (for use with
* the Fournier-Viger 08 algorithm).
* @return the item ID
*/
public int getItemId() {
// all items store the item ID, so we just take
// the ID from the first one
return getItems().get(0).getId();
}
/**
* Get the set of sequence IDs corresponding to this cluster
* @return a set of sequence IDs.
*/
public Set<Integer> getSequenceIDs() {
return sequenceIDs;
}
/**
* Set the set of sequence IDs corresponding to this cluster
* param sequenceIDs a set of sequence IDs.
*/
public void setSequenceIDs(Set<Integer> sequenceIDs) {
this.sequenceIDs = sequenceIDs;
}
}