/**
* Copyright (c) 2013 Oculus Info Inc.
* http://www.oculusinfo.com/
*
* Released under the MIT License.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package spimedb.cluster.unsupervised;
import spimedb.cluster.DataSet;
import spimedb.cluster.Instance;
import spimedb.cluster.feature.bagofwords.BagOfWordsFeature;
import spimedb.cluster.feature.bagofwords.centroid.BagOfWordsCentroid;
import spimedb.cluster.feature.bagofwords.distance.EditDistance;
import spimedb.cluster.feature.spatial.GeoSpatialFeature;
import spimedb.cluster.feature.spatial.centroid.FastGeoSpatialCentroid;
import spimedb.cluster.feature.spatial.distance.HaversineDistance;
import spimedb.cluster.unsupervised.cluster.Cluster;
import spimedb.cluster.unsupervised.cluster.ClusterResult;
import spimedb.cluster.unsupervised.cluster.threshold.ThresholdClusterer;
public class TestBagOfWordsClustering {
private static final String FEATURE_NAME1 = "tokens";
private static final String SOFT_FEATURE_NAME = "opt1";
/**
* @param args
*/
public static void main(String[] args) {
DataSet ds = new DataSet();
Instance inst = new Instance("1");
BagOfWordsFeature feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("jack", 1);
feature.setCount("black", 1);
inst.addFeature(feature);
GeoSpatialFeature soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(39.76, -98.5);
inst.addFeature(soft);
ds.add(inst);
inst = new Instance("2");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("jack", 1);
feature.setCount("black", 1);
inst.addFeature(feature);
ds.add(inst);
inst = new Instance("3");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("jack", 1);
inst.addFeature(feature);
soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(39.76, -98.5);
inst.addFeature(soft);
ds.add(inst);
inst = new Instance("4");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("jack", 1);
feature.setCount("l.", 1);
feature.setCount("black", 1);
inst.addFeature(feature);
soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(39.76, -98.5);
inst.addFeature(soft);
ds.add(inst);
inst = new Instance("5");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("j.", 1);
feature.setCount("black", 1);
inst.addFeature(feature);
soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(39.76, -98.5);
inst.addFeature(soft);
ds.add(inst);
inst = new Instance("6");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("j", 1);
feature.setCount("black", 1);
inst.addFeature(feature);
soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(39.76, -98.5);
inst.addFeature(soft);
ds.add(inst);
inst = new Instance("7");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("black", 1);
inst.addFeature(feature);
soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(39.76, -98.5);
inst.addFeature(soft);
ds.add(inst);
inst = new Instance("8");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("jackie", 1);
feature.setCount("black", 1);
inst.addFeature(feature);
soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(39.76, -98.5);
inst.addFeature(soft);
ds.add(inst);
inst = new Instance("9");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("jack", 1);
feature.setCount("brown", 1);
inst.addFeature(feature);
soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(10.0, 8.0);
inst.addFeature(soft);
ds.add(inst);
inst = new Instance("10");
feature = new BagOfWordsFeature(FEATURE_NAME1);
feature.setCount("jackie", 1);
feature.setCount("green", 1);
inst.addFeature(feature);
soft = new GeoSpatialFeature(SOFT_FEATURE_NAME);
soft.setValue(39.76, -98.5);
inst.addFeature(soft);
ds.add(inst);
ThresholdClusterer clusterer = new ThresholdClusterer();
clusterer.registerFeatureType(
FEATURE_NAME1,
BagOfWordsCentroid.class,
new EditDistance(1.0));
clusterer.registerFeatureType(
SOFT_FEATURE_NAME,
FastGeoSpatialCentroid.class,
new HaversineDistance(1.0));
clusterer.setThreshold(0.5);
ClusterResult clusters = clusterer.doCluster(ds);
for (Cluster c : clusters) {
System.out.println(c);
}
}
}