package com.github.martinprillard.shavadoop.slave;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.net.Socket;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import com.github.martinprillard.shavadoop.slave.tasktracker.StateSlave;
import com.github.martinprillard.shavadoop.util.Constant;
import com.github.martinprillard.shavadoop.util.Pair;
import com.github.martinprillard.shavadoop.util.Util;
import org.apache.commons.io.FilenameUtils;
import com.github.martinprillard.shavadoop.network.FileTransfert;
import com.github.martinprillard.shavadoop.network.SSHManager;
import com.github.martinprillard.shavadoop.util.PropReader;
/**
*
* @author martin prillard
*
*/
public class Slave {
public final static String SPLIT_MAPPING_FUNCTION = "split_mapping_function";
public final static String SHUFFLING_MAP_FUNCTION = "shuffling_map_function";
private PropReader prop = new PropReader();
private boolean taskFinished = false;
private String functionName;
private String hostMaster;
private String fileToTreat;
private SSHManager sm;
private boolean state = true;
private String msgError = "DEFAULT_MESSAGE";
private int portMasterDictionary;
private int portTaskTracker;
private volatile ConcurrentHashMap<String, Integer> finalMapsInMemory = new ConcurrentHashMap<String, Integer>();
private String idWorker;
private int nbWorker;
public Slave(String _nbWorker, String _idWorker, String _hostMaster, String _functionName, String _fileToTreat) {
nbWorker = Integer.parseInt(_nbWorker);
idWorker = _idWorker;
hostMaster = _hostMaster;
functionName = _functionName;
fileToTreat = _fileToTreat;
portMasterDictionary = Integer.parseInt(prop.getPropValues(PropReader.PORT_MASTER_DICTIONARY));
portTaskTracker = Integer.parseInt(prop.getPropValues(PropReader.PORT_TASK_TRACKER));
}
/**
* Execute a worker's task
*/
public void launchTask() {
// initialize the SSH manager
sm = new SSHManager(hostMaster);
sm.initialize();
// launch thread slave state for the task tracker
StateSlave sst = new StateSlave(this, hostMaster, portTaskTracker);
sst.start();
switch (functionName) {
case SPLIT_MAPPING_FUNCTION:
// launch map method
splitMapping(nbWorker, hostMaster, fileToTreat);
break;
case SHUFFLING_MAP_FUNCTION:
int threadMaxByWorker = Integer.parseInt(prop.getPropValues(PropReader.THREAD_MAX_BY_WORKER));
int threadQueueMaxByWorker = Integer.parseInt(prop.getPropValues(PropReader.THREAD_QUEUE_MAX_BY_WORKER));
// launch shuffling map thread
ConcurrentHashMap<String, CopyOnWriteArrayList<Integer>> sortedMaps = launchShufflingMapThread(threadMaxByWorker, threadQueueMaxByWorker);
// sum sorted maps into the final maps
mappingSortedMapsInMemory(sortedMaps);
// write the RM file
String fileToAssemble = Constant.PATH_F_REDUCING + Constant.SEP_NAME_FILE + idWorker + Constant.SEP_NAME_FILE + sm.getHostFull();
Util.writeFileFromMap(fileToAssemble, finalMapsInMemory);
// slav file -> master
ExecutorService esScpFile = Util.fixedThreadPoolWithQueueSize(threadMaxByWorker, threadQueueMaxByWorker);
esScpFile.execute(new FileTransfert(sm, hostMaster, fileToAssemble, fileToAssemble, true, false));
esScpFile.shutdown();
try {
esScpFile.awaitTermination(Integer.parseInt(prop.getPropValues(PropReader.THREAD_MAX_LIFETIME)), TimeUnit.MINUTES);
} catch (InterruptedException e) {
e.printStackTrace();
msgError = e.getMessage();
state = false;
}
break;
}
// if no fail
if (state) {
taskFinished = true;
}
}
/**
* Map method
*
* @param fileToMap
*/
private void splitMapping(int nbWorker, String hostMaster, String fileToMap) {
try {
// get the file into a list
int totalLine = Util.getFileNumberLine(fileToMap);
// initialize unsorted maps
List<ConcurrentHashMap<String, AtomicInteger>> unsortedMaps = new ArrayList<ConcurrentHashMap<String, AtomicInteger>>();
for (int i = 0; i < nbWorker; i++) {
unsortedMaps.add(new ConcurrentHashMap<String, AtomicInteger>());
}
// initialize part directory
Map<String, Pair> partDictionary = new HashMap<String, Pair>();
int idNextWorker = 0;
// find the number of thread
int nbChunks = Constant.THREAD_MAX_SPLIT_MAPPING;
if (Constant.THREAD_MAX_SPLIT_MAPPING > totalLine) {
nbChunks = totalLine; // one thread by line
}
int restLineByThread = totalLine % nbChunks;
// Calculate the number of lines for each thread
int nbLineByThread = (totalLine - restLineByThread) / (nbChunks);
ExecutorService es = Executors.newCachedThreadPool();
// split the main list into smaller list for paralleling
List<String> chunk = new ArrayList<String>();
int nbChunksCreated = 0;
FileReader fic = new FileReader(fileToMap);
BufferedReader read = new BufferedReader(fic);
String line = null;
// for each lines of the file
while ((line = read.readLine()) != null) {
// add line cleaned to the chunk
chunk.add(line);
// write the complete file by block or if it's the end of the file
if ((chunk.size() == nbLineByThread && nbChunksCreated < nbChunks - 1) || (chunk.size() == nbLineByThread + restLineByThread && nbChunksCreated == nbChunks - 1)) {
es.execute(new SplitMappingThread(unsortedMaps, chunk, nbWorker));
++nbChunksCreated;
chunk = new ArrayList<String>();
}
}
fic.close();
read.close();
es.shutdown();
try {
es.awaitTermination(Constant.THREAD_MAX_LIFETIME, TimeUnit.MINUTES);
} catch (InterruptedException e) {
e.printStackTrace();
}
// write the file
for (ConcurrentHashMap<String, AtomicInteger> e : unsortedMaps) {
if (!e.isEmpty()) {
// Write UM File
String fileToShuffle = Constant.PATH_F_MAPPING + Constant.SEP_NAME_FILE + idWorker + Constant.SEP_NAME_FILE + Constant.F_MAPPING_BY_WORKER + Constant.SEP_NAME_FILE + idNextWorker + Constant.SEP_NAME_FILE + sm.getHostFull();
Util.writeFileFromMapAtomic(fileToShuffle, e);
partDictionary.put(String.valueOf(idNextWorker), new Pair(sm.getHostFull(), fileToShuffle));
}
++idNextWorker;
}
// send dictionary with UNIQUE key (word) and hostname to the master
sendDictionaryElement(hostMaster, partDictionary);
} catch (Exception e) {
e.printStackTrace();
msgError = e.getMessage();
state = false;
}
}
/**
* Send to the master the id next worker and the names of files to do treat by the next worker
*
* @param hostMaster
* @param partDictionary
* @throws UnknownHostException
* @throws IOException
*/
private void sendDictionaryElement(String hostMaster, Map<String, Pair> partDictionary) throws UnknownHostException, IOException {
Socket socket = new Socket(hostMaster, portMasterDictionary);
ObjectOutputStream out = new ObjectOutputStream(socket.getOutputStream());
// send dictionary element
out.writeObject(partDictionary);
out.flush();
out.close();
socket.close();
}
/**
* Launch shuffling map process for each UM files in the DSM file
*/
private ConcurrentHashMap<String, CopyOnWriteArrayList<Integer>> launchShufflingMapThread(int threadMaxByWorker, int threadQueueMaxByWorker) {
ConcurrentHashMap<String, CopyOnWriteArrayList<Integer>> sortedMaps = new ConcurrentHashMap<String, CopyOnWriteArrayList<Integer>>();
HashMap<String, List<String>> filesByHost = new HashMap<String, List<String>>();
try {
InputStream ips = new FileInputStream(fileToTreat);
InputStreamReader ipsr = new InputStreamReader(ips);
BufferedReader br = new BufferedReader(ipsr);
String shufflingDictionaryLine;
// for each UM files in the DSM file
while ((shufflingDictionaryLine = br.readLine()) != null) {
String[] elements = shufflingDictionaryLine.split(Constant.SEP_CONTAINS_FILE);
String host = elements[0];
String fileToShuffling = elements[1];
// sort each files by host
List<String> files;
if (!filesByHost.containsKey(host)) {
files = new ArrayList<String>();
} else {
files = filesByHost.get(host);
}
files.add(fileToShuffling);
filesByHost.put(host, files);
}
br.close();
ipsr.close();
br.close();
ExecutorService es = Util.fixedThreadPoolWithQueueSize(threadMaxByWorker, threadQueueMaxByWorker);
for (Entry<String, List<String>> e : filesByHost.entrySet()) {
String listFileToShuffling = "";
// files by host
for (String fileToShuffling : e.getValue()) {
String fileToShufflingDest = Constant.PATH_REPO_RES + FilenameUtils.getName(fileToShuffling);
if (!new File(fileToShufflingDest).exists()) {
listFileToShuffling += fileToShufflingDest + Constant.SEP_SCP_FILES;
}
}
// remove the last char
if (listFileToShuffling.length() > 0 && listFileToShuffling.charAt(listFileToShuffling.length() - 1) == Constant.SEP_SCP_FILES.charAt(0)) {
listFileToShuffling = listFileToShuffling.substring(0, listFileToShuffling.length() - 1);
}
// launch bulk transfert file slave/master files UM -> slave
if (!listFileToShuffling.equalsIgnoreCase("")) {
es.execute(new FileTransfert(sm, e.getKey(), listFileToShuffling, Constant.PATH_REPO_RES, false, true));
}
}
es.shutdown();
try {
es.awaitTermination(Integer.parseInt(prop.getPropValues(PropReader.THREAD_MAX_LIFETIME)), TimeUnit.MINUTES);
} catch (InterruptedException e) {
e.printStackTrace();
msgError = e.getMessage();
state = false;
}
es = Util.fixedThreadPoolWithQueueSize(threadMaxByWorker, threadQueueMaxByWorker);
// for each UM files
for (Entry<String, List<String>> e : filesByHost.entrySet()) {
// files by host
for (String fileToShuffling : e.getValue()) {
// launch shuffling map
es.execute(new ShufflingMapThread(this, sortedMaps, fileToShuffling));
}
}
es.shutdown();
try {
es.awaitTermination(Integer.parseInt(prop.getPropValues(PropReader.THREAD_MAX_LIFETIME)), TimeUnit.MINUTES);
} catch (InterruptedException e) {
e.printStackTrace();
msgError = e.getMessage();
state = false;
}
} catch (IOException e) {
System.out.println("No shuffling dictionary file : " + fileToTreat);
msgError = e.getMessage();
state = false;
}
return sortedMaps;
}
/**
* Reduce method in-memory
*
* @param sortedMaps
*/
public void mappingSortedMapsInMemory(ConcurrentHashMap<String, CopyOnWriteArrayList<Integer>> sortedMaps) {
try {
// concat the localFinalMaps with the finalMapsInMemory
for (Entry<String, CopyOnWriteArrayList<Integer>> e : sortedMaps.entrySet()) {
String word = e.getKey();
List<Integer> listCounter = e.getValue();
int counterTotal = 0;
for (int i = 0; i < listCounter.size(); i++) {
counterTotal += listCounter.get(i);
}
finalMapsInMemory.put(word, counterTotal);
}
} catch (Exception e) {
e.printStackTrace();
msgError = e.getMessage();
state = false;
}
}
public boolean isState() {
return state;
}
public void setState(boolean state) {
this.state = state;
}
public boolean isTaskFinished() {
return taskFinished;
}
public String getMsgError() {
return msgError;
}
public void setMsgError(String msgError) {
this.msgError = msgError;
}
}