package mia.clustering.ch08; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.NamedVector; import org.apache.mahout.math.VectorWritable; public class ApplesToVectors { public static void main(String args[]) throws Exception { List<NamedVector> apples = new ArrayList<NamedVector>(); NamedVector apple; apple = new NamedVector( new DenseVector(new double[] {0.11, 510, 1}), "Small round green apple"); apples.add(apple); apple = new NamedVector( new DenseVector(new double[] {0.23, 650, 3}), "Large oval red apple"); apples.add(apple); apple = new NamedVector( new DenseVector(new double[] {0.09, 630, 1}), "Small elongated red apple"); apples.add(apple); apple = new NamedVector( new DenseVector(new double[] {0.25, 590, 3}), "Large round yellow apple"); apples.add(apple); apple = new NamedVector( new DenseVector(new double[] {0.18, 520, 2}), "Medium oval green apple"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("appledata/apples"); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class); VectorWritable vec = new VectorWritable(); for (NamedVector vector : apples) { vec.set(vector); writer.append(new Text(vector.getName()), vec); } writer.close(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf); Text key = new Text(); VectorWritable value = new VectorWritable(); while (reader.next(key, value)) { System.out.println(key.toString() + " " + value.get().asFormatString()); } reader.close(); } }