package skywriting.examples.terasort; import skywriting.examples.grep.Text; import uk.co.mrry.mercator.task.Task; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; public class SWTeraSampler implements Task { public void invoke(InputStream[] inputs, OutputStream[] outputs, String[] args) { int nBucketers = Integer.parseInt(args[0]); int nPartitions = Integer.parseInt(args[1]); int nRecordsExpected = Integer.parseInt(args[2]); int recordsBetweenBoundaries = nRecordsExpected / nPartitions; DataInputStream[] dis = new DataInputStream[nBucketers]; for(int i = 0; i < nBucketers; i++) { dis[i] = new DataInputStream(new BufferedInputStream(inputs[i])); } DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(outputs[0])); TextPairIterator iter = null; try { iter = Merger.merge(dis); } catch(IOException e) { System.err.println("Exception during merge: " + e); System.exit(2); } try { int i = 1; int boundariesEmitted = 0; while((boundariesEmitted < (nPartitions - 1)) && iter.next()) { Text key; if((i % recordsBetweenBoundaries) == 0) { key = iter.getKey(); key.write(dos); boundariesEmitted++; } i++; } dos.close(); if(boundariesEmitted != (nPartitions - 1)) { System.err.printf("Emitted %d boundaries (expected %d), probably due to short input (got %d records, expected %d)\n", boundariesEmitted, nPartitions - 1, i, nRecordsExpected); System.exit(2); } } catch(IOException e) { System.err.println("Exception during iteration / writing: " + e); System.exit(2); } } }