package com.linkedin.camus.workallocater;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Properties;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import com.linkedin.camus.etl.kafka.mapred.EtlSplit;
public class BaseAllocator extends WorkAllocator {
protected Properties props;
public void init(Properties props) {
this.props = props;
}
protected void reverseSortRequests(List<CamusRequest> requests) {
// Reverse sort by size
Collections.sort(requests, new Comparator<CamusRequest>() {
@Override
public int compare(CamusRequest o1, CamusRequest o2) {
if (o2.estimateDataSize() == o1.estimateDataSize()) {
return 0;
}
if (o2.estimateDataSize() < o1.estimateDataSize()) {
return -1;
} else {
return 1;
}
}
});
}
@Override
public List<InputSplit> allocateWork(List<CamusRequest> requests, JobContext context) throws IOException {
int numTasks = context.getConfiguration().getInt("mapred.map.tasks", 30);
reverseSortRequests(requests);
List<InputSplit> kafkaETLSplits = new ArrayList<InputSplit>();
for (int i = 0; i < numTasks; i++) {
if (requests.size() > 0) {
kafkaETLSplits.add(new EtlSplit());
}
}
for (CamusRequest r : requests) {
getSmallestMultiSplit(kafkaETLSplits).addRequest(r);
}
return kafkaETLSplits;
}
protected EtlSplit getSmallestMultiSplit(List<InputSplit> kafkaETLSplits) throws IOException {
EtlSplit smallest = (EtlSplit) kafkaETLSplits.get(0);
for (int i = 1; i < kafkaETLSplits.size(); i++) {
EtlSplit challenger = (EtlSplit) kafkaETLSplits.get(i);
if ((smallest.getLength() == challenger.getLength() && smallest.getNumRequests() > challenger.getNumRequests())
|| smallest.getLength() > challenger.getLength()) {
smallest = challenger;
}
}
return smallest;
}
}