package skywriting.examples.terasort;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import skywriting.examples.grep.Text;
import uk.co.mrry.mercator.task.Task;
public class SWTeraBucketer implements Task {
private static class TextPair {
public Text key;
public Text value;
}
private static class StreamPusher extends Thread {
private Object[] pairs;
private OutputStream out;
private DataOutputStream dataOut;
public Exception e;
public StreamPusher(Object[] pairs, OutputStream out) {
this.pairs = pairs;
this.out = out;
this.dataOut = new DataOutputStream(new BufferedOutputStream(out));
}
public void run() {
try {
for(Object o : pairs) {
TextPair t = (TextPair)o;
t.key.write(this.dataOut);
t.value.write(this.dataOut);
}
this.dataOut.close();
this.out.close();
}
catch(IOException e) {
System.err.println("Error writing: " + e);
this.e = e;
}
}
}
private static class ISTextArray implements IndexedSortable {
private Object[] myArray;
public ISTextArray(Object[] p) {
myArray = p;
}
@Override
public int compare(int i, int j) {
return ((TextPair)myArray[i]).key.compareTo(((TextPair)myArray[j]).key);
}
@Override
public void swap(int i, int j) {
Object temp = myArray[i];
myArray[i] = myArray[j];
myArray[j] = temp;
}
}
public void invoke(InputStream[] inputs, OutputStream[] outputs, String[] args) {
/* Expected:
* Two inputs:
* 0: The partition descriptors written by SWTeraSampler
* 1: A sequence of lines to be parsed and bucketed
* N outputs:
* (One per partition)
* One argument:
* 0: The number of reducers
*/
int nPartitions = Integer.parseInt(args[0]);
DataInputStream dis = null;
if(nPartitions > 1) {
dis = new DataInputStream(inputs[0]);
}
Text[] boundaries = new Text[nPartitions - 1];
for(int i = 0; i < (nPartitions - 1); i++) {
try {
Text splitValue = new Text();
splitValue.readFields(dis);
System.out.printf("Split %d is at %s\n", i, splitValue.toString());
boundaries[i] = splitValue;
}
catch(IOException e) {
System.err.println("Exception reading partition " + i + ": " + e);
System.exit(2);
}
}
TotalOrderPartitioner part = null;
if(nPartitions > 1) {
part = new TotalOrderPartitioner();
part.configure(boundaries);
}
ArrayList<TextPair>[] outBuffers = new ArrayList[nPartitions];
for(int i = 0; i < nPartitions; i++) {
outBuffers[i] = new ArrayList<TextPair>();
}
RecordReader<Text, Text> reader = null;
try {
reader = new TeraInputFormat.TeraRecordReader(inputs[1]);
}
catch(IOException e) {
System.err.println("Exception opening input: " + e);
System.exit(2);
}
Text key = new Text();
Text value = new Text();
try {
while(reader.next(key, value)) {
int thisPart;
if(nPartitions > 1) {
thisPart = part.getPartition(key, value, nPartitions);
}
else {
thisPart = 0;
}
TextPair entry = new TextPair();
entry.key = key;
entry.value = value;
outBuffers[thisPart].add(entry);
key = new Text();
value = new Text();
}
}
catch(IOException e) {
System.err.println("Exception reading records: " + e);
System.exit(2);
}
StreamPusher[] pushThreads = new StreamPusher[nPartitions];
for(int i = 0; i < nPartitions; i++) {
Object[] pairs = outBuffers[i].toArray();
if(outBuffers[i].size() > 0)
new QuickSort().sort(new ISTextArray(pairs), 0, pairs.length);
pushThreads[i] = new StreamPusher(pairs, outputs[i]);
}
for(int i = 0; i < nPartitions; i++) {
pushThreads[i].start();
}
for(int i = 0; i < nPartitions; i++) {
try {
pushThreads[i].join();
if(pushThreads[i].e != null) {
System.err.println("A pusher thread failed with exception " + pushThreads[i].e);
System.exit(2);
}
}
catch(Exception e) {
System.err.println("Joining a pusher thread failed with exception " + pushThreads[i].e);
System.exit(2);
}
}
}
}