package org.plista.kornakapi.core.training; import java.util.List; import org.apache.mahout.clustering.streaming.cluster.StreamingKMeans; import org.apache.mahout.common.distance.ManhattanDistanceMeasure; import org.apache.mahout.math.Centroid; import org.apache.mahout.math.neighborhood.FastProjectionSearch; import org.apache.mahout.math.neighborhood.UpdatableSearcher; import org.plista.kornakapi.core.cluster.StreamingKMeansClassifierModel; import org.plista.kornakapi.core.storage.MySqlKMeansDataFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class StramingKMeansClusterer { MySqlKMeansDataFilter extractor; StreamingKMeansClassifierModel model; int clusters; long cutoff; StreamingKMeans clusterer; private static final Logger log = LoggerFactory.getLogger(StramingKMeansClusterer.class); public StramingKMeansClusterer(StreamingKMeansClassifierModel model,int clusters,long cutoff){ this.model = model; this.clusters = clusters; this.cutoff = cutoff; UpdatableSearcher searcher = new FastProjectionSearch(new ManhattanDistanceMeasure(), 10, 10); clusterer = new StreamingKMeans(searcher, clusters,cutoff); } /** * retrain model */ public void cluster(){ long start = System.currentTimeMillis(); List<Centroid> data = model.getData(); UpdatableSearcher centroids = clusterer.cluster(data); long estimateDuration = System.currentTimeMillis() - start; this.model.updateCentroids(centroids); if (log.isInfoEnabled()) { log.info("Model trained in {} ms, created [{}] Clusters", estimateDuration, centroids.size()); } } /** * just stream new available data-points into old coordinate system */ public void stream(){ long start = System.currentTimeMillis(); List<Centroid> data = model.getNewData(); UpdatableSearcher centroids = clusterer.cluster(data); long estimateDuration = System.currentTimeMillis() - start; this.model.updateCentroids(centroids); if (log.isInfoEnabled()) { log.info("Model trained in {} ms, created [{}] Clusters", estimateDuration, centroids.size()); } } }