package phoenix.kmeans; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class Kmeans<T extends IItem> { /** * 所有数据列表 */ private List<T> items ; /** * 数据类别 */ private Class<T> clazz ; /** * 中心点集合 */ private List<T> seedList; /** * 分类数 */ private int k = 1; public Kmeans(List<T> list, int k,Class<T> clazz) { this.items = list; this.k = k; this.clazz = clazz; } /** * 执行聚类运算 * @return */ public Result run() throws InstantiationException, IllegalAccessException { (clazz.newInstance()).prehandle(items); seedList = new ArrayList<T>(items.subList(0,k)); //默认选几个数据点当中心 @SuppressWarnings("unchecked") List<T>[] results = new ArrayList[k]; boolean centerChanged = true; while (centerChanged) { centerChanged = false; //清空结果数组 for (int i = 0; i < k; i++) { if(results[i]==null){ results[i]=new ArrayList<T>(); }else{ results[i].clear(); } } //运算每个数据点与种子的距离,投放到距离近的种子对应的结果集中 T tmp_item; int min_index=0; double min_dist=Double.MAX_VALUE,tmp_dist; for (int i = 0; i < items.size(); i++) { tmp_item = items.get(i); min_dist=Double.MAX_VALUE; for (int j = 0; j < seedList.size(); j++) { tmp_dist = seedList.get(j).distance(tmp_item); if(tmp_dist<min_dist){ min_index=j; min_dist=tmp_dist; } } results[min_index].add(tmp_item); } //找新的中心点,更换掉种子 for (int i = 0; i < k; i++) { if(results[i]==null || results[i].size()==0){ continue; } T t_new = findNewCenter(results[i]); if (!seedList.get(i).equals(t_new)){ centerChanged = true; seedList.set(i, t_new); } } System.out.println("=="); } return new Result(true, results, seedList); } /** * 得到新聚类中心对象 * @param ps * @return */ public T findNewCenter(List<T> ps) throws InstantiationException,IllegalAccessException { T t = clazz.newInstance(); int fieldnum = t.getDimensionNum(); double[] ds = new double[fieldnum]; double[] tmpd; for (T vo : ps) { tmpd = vo.getDatas(); for (int i = 0; i < fieldnum; i++) { ds[i] += tmpd[i]; } } for (int i = 0; i < fieldnum; i++) { ds[i] = ds[i] / ps.size(); } t.initPoint(ds); return t; } public void loadDataFile(String filepath,int dimensionNum,Class<T> clazz,String split) throws NumberFormatException, IOException, InstantiationException, IllegalAccessException{ List<T> list = new ArrayList<T>(); File file = new File(filepath); BufferedReader reader = new BufferedReader(new FileReader(file)); String tempString = null; T p = null ; while ((tempString = reader.readLine()) != null) { p = clazz.newInstance(); double[] s = new double[dimensionNum]; for(int i=0; i <dimensionNum ;i++){ s[i]=Double.parseDouble(tempString.split(split)[i]); } p.initPoint(s); list.add(p); } reader.close(); items = list ; } public class Result { /** * 处理结果 */ public boolean success = true ; /** * 数据分组后的结果 */ public List<T>[] classifyResults ; /** * 中心点集合 */ public List<T> cores ; public Result(boolean success,List<T>[] classifyResults,List<T> cores){ this.success = success; this.classifyResults = classifyResults; this.cores = cores; } } }