package com.github.martinprillard.shavadoop.util; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.LineNumberReader; import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; /** * * @author martin prillard * */ public class Util { /** * Write a file from map * * @param nameFile * @param content */ public static void writeFileFromMap(String nameFile, Map<String, Integer> content) { try { FileWriter fw = new FileWriter(nameFile); BufferedWriter bw = new BufferedWriter(fw); PrintWriter write = new PrintWriter(bw); for (Entry<String, Integer> entry : content.entrySet()) { write.println(entry.getKey() + Constant.SEP_CONTAINS_FILE + entry.getValue()); } write.close(); bw.close(); fw.close(); } catch (Exception e) { e.printStackTrace(); } } /** * Write a file from map * * @param nameFile * @param content */ public static void writeFileFromMapAtomic(String nameFile, Map<String, AtomicInteger> content) { try { FileWriter fw = new FileWriter(nameFile); BufferedWriter bw = new BufferedWriter(fw); PrintWriter write = new PrintWriter(bw); for (Entry<String, AtomicInteger> entry : content.entrySet()) { write.println(entry.getKey() + Constant.SEP_CONTAINS_FILE + entry.getValue().get()); } write.close(); bw.close(); fw.close(); } catch (Exception e) { e.printStackTrace(); } } /** * Write a file from String * * @param nameFile * @param content */ public static void writeFile(String nameFile, String content) { try { FileWriter fw = new FileWriter(nameFile); BufferedWriter bw = new BufferedWriter(fw); PrintWriter write = new PrintWriter(bw); write.print(content); write.close(); bw.close(); fw.close(); } catch (Exception e) { e.printStackTrace(); } } /** * Write a file from list of string * * @param nameFile * @param content */ public static void writeFile(String nameFile, List<String> content) { try { FileWriter fw = new FileWriter(nameFile); BufferedWriter bw = new BufferedWriter(fw); PrintWriter write = new PrintWriter(bw); for (String line : content) { write.println(line); } write.close(); bw.close(); fw.close(); } catch (Exception e) { e.printStackTrace(); } } /** * Write a file from list of pair * * @param nameFile * @param content */ public static void writeFileFromPair(String nameFile, List<Pair> content) { // if the file exist, we concat if (new File(nameFile).exists()) { try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(nameFile, true)))) { for (Pair p : content) { out.println(p.getVal1() + Constant.SEP_CONTAINS_FILE + p.getVal2()); } } catch (Exception e) { e.printStackTrace(); } // we create the file } else { try { FileWriter fw = new FileWriter(nameFile); BufferedWriter bw = new BufferedWriter(fw); PrintWriter write = new PrintWriter(bw); for (Pair p : content) { write.println(p.getVal1() + Constant.SEP_CONTAINS_FILE + p.getVal2()); } write.close(); bw.close(); fw.close(); } catch (Exception e) { System.out.println(e.toString()); } } } /** * Create a directory recursively * * @param file */ private static void createDirectory(File file) { // if the directory does not exist, create it File parent = new File(file.getParent()); if (parent != null && !parent.exists()) { createDirectory(parent); } try { file.mkdir(); } catch (Exception e) { e.printStackTrace(); } } /** * Clean the directory * * @param file */ private static void cleanDirectory(File file) { try { FileUtils.cleanDirectory(file); } catch (IOException e) { e.printStackTrace(); } } /** * Create and clean a directory * * @param pathRepoRes * @param clean */ public static void initializeResDirectory(String pathRepoRes, boolean clean) { Pattern paternRootPath = Pattern.compile(Constant.PATH_ROOT); Matcher matcherRootPath = paternRootPath.matcher(pathRepoRes); // clean directory if (!matcherRootPath.find()) { createDirectory(new File(pathRepoRes)); if (clean) { cleanDirectory(new File(pathRepoRes)); } if (Constant.MODE_DEBUG) System.out.println(pathRepoRes + " directory cleaned"); } else { if (Constant.MODE_DEBUG) System.out.println(pathRepoRes + " is the root path ! "); } } /** * Create a exector service with n threads running max and n threads max in queue * * @param nThreads * @param queueSize * @return executor service */ public static ExecutorService fixedThreadPoolWithQueueSize(int nThreads, int queueSize) { return new ThreadPoolExecutor(nThreads, nThreads, 5000L, TimeUnit.MILLISECONDS, new ArrayBlockingQueue<Runnable>(queueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); } /** * get the number of line of the file * * @param file * @return number of line for this file */ public static int getFileNumberLine(String file) { int nbLine = 0; FileReader fic; try { fic = new FileReader(new File(file)); LineNumberReader lnr = new LineNumberReader(fic); lnr.skip(Long.MAX_VALUE); nbLine = lnr.getLineNumber(); lnr.close(); } catch (Exception e) { e.printStackTrace(); } return nbLine; } /** * Split a file by line * * @param file * @param nbLineByHost * @param restLineByHost * @param nbWorkerMappers * @return list files splitted */ public static List<String> splitByLineFile(String file, int nbLineByHost, int restLineByHost, int nbWorkerMappers) { List<String> filesToMap = new ArrayList<String>(); try { String line = null; int nbFile = 0; // content of the file List<String> content = new ArrayList<String>(); FileReader fic = new FileReader(new File(file)); BufferedReader read = new BufferedReader(fic); while ((line = read.readLine()) != null) { // add line by line to the content file content.add(line); // write the complete file by block or if it's the end of the file if ((content.size() == nbLineByHost && nbFile < nbWorkerMappers - 1) || (content.size() == nbLineByHost + restLineByHost && nbFile == nbWorkerMappers - 1)) { // for each group of line, we write a new file ++nbFile; String fileToMap = Constant.PATH_F_SPLITING + nbFile; Util.writeFile(fileToMap, content); if (Constant.MODE_DEBUG) System.out.println("Input file splited in : " + fileToMap); // we save names of theses files in a list filesToMap.add(fileToMap); // reset content = new ArrayList<String>(); } } read.close(); fic.close(); } catch (IOException e) { e.printStackTrace(); } return filesToMap; } /** * Split large file by bloc * * @param file */ public static List<String> splitLargeFile(String file) { List<String> filesToMap = new ArrayList<String>(); File inputFile = new File(file); FileInputStream inputStream; FileOutputStream filePart; long fileSize = inputFile.length(); int nbFile = 0; int read = 0; int readLength = Constant.BLOC_SIZE_MIN; byte[] byteChunkPart; try { inputStream = new FileInputStream(inputFile); while (fileSize > 0) { if (Constant.BLOC_SIZE_MIN > fileSize) { readLength = (int) fileSize; } byteChunkPart = new byte[readLength]; read = inputStream.read(byteChunkPart, 0, readLength); fileSize -= read; assert (read == byteChunkPart.length); nbFile++; String fileToMap = Constant.PATH_F_SPLITING + nbFile; filePart = new FileOutputStream(new File(fileToMap)); filesToMap.add(fileToMap); filePart.write(byteChunkPart); filePart.flush(); filePart.close(); byteChunkPart = null; filePart = null; } inputStream.close(); } catch (IOException exception) { exception.printStackTrace(); } return filesToMap; } /** * FNVHash * * @param k * @return hashage */ public static long hash64(final String k) { long FNV_64_INIT = 0xcbf29ce484222325L; long FNV_64_PRIME = 0x100000001b3L; long rv = FNV_64_INIT; final int len = k.length(); for (int i = 0; i < len; i++) { rv ^= k.charAt(i); rv *= FNV_64_PRIME; } return rv; } /** * Simple hashage function * * @param k * @return hashage */ public static long simpleHash(final String k) { long hash = 7; for (int i = 0; i < k.length(); i++) { hash = hash * 31 + (k.charAt(i)); } return hash; } }