package com.alimama.quanjingmonitor.kmeans; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.Reader; import org.apache.hadoop.io.Text; public class KmeansPublic { public static PathFilter FILTER=new org.apache.hadoop.fs.PathFilter() { public boolean accept(Path path) { String name = path.getName(); return !(name.endsWith(".crc") || name.startsWith(".") || name .startsWith("_")); } }; public static void configureWithClusterInfo(Configuration conf, Path clusterPathStr, Collection<Cluster> clusters) throws IOException { Path clusterPath = new Path(clusterPathStr, "*"); Collection<Path> result = new ArrayList<Path>(); // get all filtered file names in result list FileSystem fs = clusterPath.getFileSystem(conf); FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(clusterPath, FILTER)),FILTER); for (FileStatus match : matches) { result.add(fs.makeQualified(match.getPath())); } // iterate through the result path list for (Path path : result) { SequenceFile.Reader reader= new SequenceFile.Reader(fs, path, conf); Text key=new Text(); Cluster clu=new Cluster(); while (reader.next(key, clu)) { clusters.add(new Cluster(clu)); } reader.close(); } System.out.println("####clusters.size="+clusters.size()); } }