package skywriting.examples.skyhout.input; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.serializer.Serialization; import org.apache.hadoop.io.serializer.WritableSerialization; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.DenseVectorWritable; import org.apache.mahout.math.VectorWritable; import skywriting.examples.skyhout.common.SkywritingTaskFileSystem; import uk.co.mrry.mercator.task.JarTaskLoader; import uk.co.mrry.mercator.task.Task; public class VectorInputParserTask implements Task { @Override public void invoke(InputStream[] fis, OutputStream[] fos, String[] args) { try { Configuration conf = new Configuration(); conf.setClassLoader(JarTaskLoader.CLASSLOADER); conf.setClass("io.serializations", WritableSerialization.class, Serialization.class); new WritableSerialization(); SkywritingTaskFileSystem fs = new SkywritingTaskFileSystem(fis, fos, conf); SequenceFile.Writer[] writers = new SequenceFile.Writer[fos.length]; for (int i = 0; i < fos.length; ++i) { writers[i] = new SequenceFile.Writer(fs, conf, new Path("/in/" + i), Text.class, VectorWritable.class); } int currentVector = 0; for (int i = 0; i < fis.length; ++i) { BufferedReader lineReader = new BufferedReader(new InputStreamReader(fis[i])); String line; while ((line = lineReader.readLine()) != null) { String[] fields = line.split("\\s+"); DenseVector dv = new DenseVector(new double[fields.length]); VectorWritable vector = new VectorWritable(dv); for (int j = 0; j < fields.length; ++j) { dv.set(j, Double.parseDouble(fields[j])); } writers[currentVector % writers.length].append(new Text("c" + currentVector), vector); ++currentVector; } } for (int i = 0; i < writers.length; ++i) { writers[i].close(); } } catch (IOException ioe) { throw new RuntimeException(ioe); } } }