/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.regression.core.util; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.falcon.regression.core.helpers.ColoHelper; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.log4j.Logger; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.UUID; import java.util.regex.Pattern; /** * Util methods related to hadoop. */ public final class HadoopUtil { public static final String SOMETHING_RANDOM = "somethingRandom"; private static final Logger LOGGER = Logger.getLogger(HadoopUtil.class); private static Pattern protocol = Pattern.compile(":[\\d]+/"); private HadoopUtil() { throw new AssertionError("Instantiating utility class..."); } /* * Removes 'hdfs(hftp)://server:port' */ public static String cutProtocol(String path) { if (StringUtils.isNotEmpty(path)) { if (protocol.matcher(path).find()) { return '/' + protocol.split(path)[1]; } } return path; } public static String joinPath(String basePath, String... restParts) { final String separator = "/"; List<String> cleanParts = new ArrayList<>(); String cleanBasePath = basePath.replaceFirst(separator + "$", ""); cleanParts.add(cleanBasePath); for (String onePart : restParts) { final String cleanPart = onePart.replaceFirst("^" + separator, "").replaceFirst(separator + "$", ""); cleanParts.add(cleanPart); } return StringUtils.join(cleanParts, separator); } /** * Retrieves all file names contained in a given directory. * @param fs filesystem * @param location given directory * @return list of file names * @throws IOException */ public static List<String> getAllFilesHDFS(FileSystem fs, Path location) throws IOException { List<String> files = new ArrayList<>(); if (!fs.exists(location)) { return files; } FileStatus[] stats = fs.listStatus(location); for (FileStatus stat : stats) { if (!isDir(stat)) { files.add(stat.getPath().toString()); } } return files; } /** * Retrieves all directories withing a given depth starting from a specific dir. * @param fs filesystem * @param location given dir * @param depth depth * @return all matching directories * @throws IOException */ public static List<Path> getAllDirsRecursivelyHDFS( FileSystem fs, Path location, int depth) throws IOException { List<Path> returnList = new ArrayList<>(); FileStatus[] stats = fs.listStatus(location); for (FileStatus stat : stats) { if (isDir(stat)) { returnList.add(stat.getPath()); if (depth > 0) { returnList.addAll(getAllDirsRecursivelyHDFS(fs, stat.getPath(), depth - 1)); } } } return returnList; } /** * Recursively retrieves all data file names from a given location. * @param fs filesystem * @param location given location * @return list of all files * @throws IOException */ public static List<Path> getAllFilesRecursivelyHDFS( FileSystem fs, Path location) throws IOException { List<Path> returnList = new ArrayList<>(); RemoteIterator<LocatedFileStatus> remoteIterator; try { remoteIterator = fs.listFiles(location, true); } catch (FileNotFoundException e) { LOGGER.info("Path '" + location + "' is not found on " + fs.getUri()); return returnList; } while(remoteIterator.hasNext()) { Path path = remoteIterator.next().getPath(); if (!path.toUri().toString().contains("_SUCCESS")) { returnList.add(path); } } return returnList; } /** * Recursively retrieves all data file names from a given location and looks for presence of availabilityFlag. * If availabilityFlag is null then it looks for _SUCCESS file(set as default). * @param fs filesystem * @param location given location * @param availabilityFlag value of availability flag set in entity * @return * @throws IOException */ public static boolean getSuccessFolder( FileSystem fs, Path location, String availabilityFlag) throws IOException { LOGGER.info("location : " + location); for(FileStatus stat : fs.listStatus(location)) { if (availabilityFlag.isEmpty()) { if (stat.getPath().getName().equals("_SUCCESS")) { return true; } } else { if (stat.getPath().getName().equals(availabilityFlag)) { return true; } } } return false; } @SuppressWarnings("deprecation") private static boolean isDir(FileStatus stat) { return stat.isDir(); } /** * Copies file from local place to hdfs location. * @param fs target filesystem * @param dstHdfsDir destination * @param srcFileLocation source location * @throws IOException */ public static void copyDataToFolder(final FileSystem fs, String dstHdfsDir, final String srcFileLocation) throws IOException { LOGGER.info(String.format("Copying local dir %s to hdfs location %s on %s", srcFileLocation, dstHdfsDir, fs.getUri())); fs.copyFromLocalFile(new Path(srcFileLocation), new Path(cutProtocol(dstHdfsDir))); } /** * Copies a whole directory to hdfs. * @param fs target filesystem * @param dstHdfsDir destination dir * @param localLocation source location * @throws IOException */ public static void uploadDir(final FileSystem fs, final String dstHdfsDir, final String localLocation) throws IOException { LOGGER.info(String.format("Uploading local dir %s to hdfs location %s", localLocation, dstHdfsDir)); HadoopUtil.deleteDirIfExists(dstHdfsDir, fs); HadoopUtil.copyDataToFolder(fs, dstHdfsDir, localLocation); } /** * Copies given data to hdfs location. * @param fs target filesystem * @param dstHdfsDir destination dir * @param data source location * @param overwrite do we want to overwrite the data * @throws IOException */ public static void writeDataForHive(final FileSystem fs, final String dstHdfsDir, final CharSequence data, boolean overwrite) throws IOException { LOGGER.info(String.format("Writing data %s to hdfs location %s", data, dstHdfsDir)); final File tempFile = File.createTempFile(UUID.randomUUID().toString().split("-")[0], ".dat"); FileUtils.write(tempFile, data); if (overwrite) { HadoopUtil.deleteDirIfExists(dstHdfsDir, fs); } try { fs.mkdirs(new Path(dstHdfsDir)); } catch (Exception e) { //ignore } fs.setPermission(new Path(dstHdfsDir), FsPermission.getDirDefault()); HadoopUtil.copyDataToFolder(fs, dstHdfsDir, tempFile.getAbsolutePath()); if (!tempFile.delete()) { LOGGER.warn("Deletion of " + tempFile + " failed."); } } /** * Lists names of given directory subfolders. * @param fs filesystem * @param baseDir given directory * @return list of subfolders * @throws IOException */ public static List<String> getHDFSSubFoldersName(FileSystem fs, String baseDir) throws IOException { List<String> returnList = new ArrayList<>(); FileStatus[] stats = fs.listStatus(new Path(baseDir)); for (FileStatus stat : stats) { if (isDir(stat)) { returnList.add(stat.getPath().getName()); } } return returnList; } /** * Checks if file is present in given directory. * @param fs filesystem * @param hdfsPath path to a given directory * @param fileToCheckFor file * @return either file present or not * @throws IOException */ public static boolean isFilePresentHDFS(FileSystem fs, String hdfsPath, String fileToCheckFor) throws IOException { LOGGER.info("getting file from folder: " + hdfsPath); List<String> fileNames = getAllFileNamesFromHDFS(fs, hdfsPath); for (String filePath : fileNames) { if (filePath.contains(fileToCheckFor)) { return true; } } return false; } /** * Lists all file names for a given directory. * @param fs filesystem * @param hdfsPath path to a given directory * @return list of files which given directory contains * @throws IOException */ private static List<String> getAllFileNamesFromHDFS( FileSystem fs, String hdfsPath) throws IOException { List<String> returnList = new ArrayList<>(); LOGGER.info("getting file from folder: " + hdfsPath); FileStatus[] stats = fs.listStatus(new Path(hdfsPath)); for (FileStatus stat : stats) { String currentPath = stat.getPath().toUri().getPath(); // gives directory name if (!isDir(stat)) { returnList.add(currentPath); } } return returnList; } /** * Removes directory with a given name and creates empty one with the same name. * @param fs filesystem * @param path path to a directory * @throws IOException */ public static void recreateDir(FileSystem fs, String path) throws IOException { deleteDirIfExists(path, fs); LOGGER.info("creating hdfs dir: " + path + " on " + fs.getConf().get("fs.default.name")); fs.mkdirs(new Path(path)); } /** * Recreates dirs for a list of filesystems. * @param fileSystems list of filesystems * @param path path to a directory * @throws IOException */ public static void recreateDir(List<FileSystem> fileSystems, String path) throws IOException { for (FileSystem fs : fileSystems) { recreateDir(fs, path); } } /** * Removes given directory from a filesystem. * @param hdfsPath path to a given directory * @param fs filesystem * @throws IOException */ public static void deleteDirIfExists(String hdfsPath, FileSystem fs) throws IOException { Path path = new Path(hdfsPath); if (fs.exists(path)) { LOGGER.info(String.format("Deleting HDFS path: %s on %s", path, fs.getUri())); fs.delete(path, true); } else { LOGGER.info(String.format( "Not deleting non-existing HDFS path: %s on %s", path, fs.getUri())); } } /** * Copies data in folders without prefix. * @param fs filesystem * @param inputPath source location * @param remoteLocations destination location * @throws IOException */ public static void flattenAndPutDataInFolder(FileSystem fs, String inputPath, List<String> remoteLocations) throws IOException { flattenAndPutDataInFolder(fs, inputPath, "", remoteLocations); } /** * Copies files from a source directory to target directories on hdfs. * @param fs target filesystem * @param inputPath source location * @param remotePathPrefix prefix for target directories * @param remoteLocations target directories * @return list of exact locations where data was copied * @throws IOException */ public static List<String> flattenAndPutDataInFolder(FileSystem fs, String inputPath, String remotePathPrefix, List<String> remoteLocations) throws IOException { if (StringUtils.isNotEmpty(remotePathPrefix)) { deleteDirIfExists(remotePathPrefix, fs); } LOGGER.info("Creating data in folders: \n" + remoteLocations); File input = new File(inputPath); File[] files = input.isDirectory() ? input.listFiles() : new File[]{input}; List<Path> filePaths = new ArrayList<>(); assert files != null; for (final File file : files) { if (!file.isDirectory()) { final Path filePath = new Path(file.getAbsolutePath()); filePaths.add(filePath); } } if (!remotePathPrefix.endsWith("/") && !remoteLocations.get(0).startsWith("/")) { remotePathPrefix += "/"; } List<String> locations = new ArrayList<>(); for (String remoteDir : remoteLocations) { String remoteLocation = remotePathPrefix + remoteDir; remoteLocation = cutProtocol(remoteLocation); locations.add(remoteLocation); LOGGER.info(String.format("copying to: %s files: %s", fs.getUri() + remoteLocation, Arrays.toString(files))); if (!fs.exists(new Path(remoteLocation))) { fs.mkdirs(new Path(remoteLocation)); } fs.copyFromLocalFile(false, true, filePaths.toArray(new Path[filePaths.size()]), new Path(remoteLocation)); } return locations; } /** * Copies data from local sources to remote directories. * @param fs target filesystem * @param folderPrefix prefix for remote directories * @param folderList remote directories * @param fileLocations sources * @throws IOException */ public static void copyDataToFolders(FileSystem fs, final String folderPrefix, List<String> folderList, String... fileLocations) throws IOException { for (final String folder : folderList) { String folderSpace = folder.replaceAll("/", "_"); File file = new File(OSUtil.NORMAL_INPUT + folderSpace + ".txt"); FileUtils.writeStringToFile(file, "folder", true); fs.copyFromLocalFile(new Path(file.getAbsolutePath()), new Path(folderPrefix + folder)); if (!file.delete()) { LOGGER.info("delete was not successful for file: " + file); } Path[] srcPaths = new Path[fileLocations.length]; for (int i = 0; i < srcPaths.length; ++i) { srcPaths[i] = new Path(fileLocations[i]); } LOGGER.info(String.format("copying %s to %s%s on %s", Arrays.toString(srcPaths), folderPrefix, folder, fs.getUri())); fs.copyFromLocalFile(false, true, srcPaths, new Path(folderPrefix + folder)); } } /** * Uploads data to remote directories with names within date ranges. * @param fs target filesystem * @param interval dates ranges before and after current date * @param minuteSkip time to skip within a range to get intermediate directories * @param folderPrefix prefix for remote directories * @throws IOException */ public static void lateDataReplenish(FileSystem fs, int interval, int minuteSkip, String folderPrefix) throws IOException { List<String> folderData = TimeUtil.getMinuteDatesOnEitherSide(interval, minuteSkip); folderData.add(SOMETHING_RANDOM); flattenAndPutDataInFolder(fs, OSUtil.NORMAL_INPUT, folderPrefix, folderData); } /** * Creates list of folders on remote filesystem. * @param fs remote filesystem * @param folderPrefix prefix for remote directories * @param folderList list of folders * @throws IOException */ public static void createFolders(FileSystem fs, final String folderPrefix, List<String> folderList) throws IOException { for (final String folder : folderList) { final String pathString = cutProtocol(folderPrefix + folder); LOGGER.info("Creating " + fs.getUri() + "/" + pathString); fs.mkdirs(new Path(pathString)); } } /** * Created folders in remote location according to current time and copies files here. * @param fs target filesystem * @param remoteLocation remote location * @param localLocation source * @throws IOException */ public static void injectMoreData(FileSystem fs, final String remoteLocation, String localLocation) throws IOException { File[] files = new File(localLocation).listFiles(); assert files != null; for (final File file : files) { if (!file.isDirectory()) { String path = remoteLocation + "/" + System.currentTimeMillis() / 1000 + "/"; LOGGER.info("inserting data@ " + path); fs.copyFromLocalFile(new Path(file.getAbsolutePath()), new Path(path)); } } } /** * Uploads either _SUCCESS or dataFile4.txt file to remote directories with names within date * ranges. * @param fs target filesystem * @param interval dates ranges before and after current date * @param minuteSkip time to skip within a range to get intermediate directories * @param folderPrefix prefix for remote directories * @param fileToBePut what file to copy to remote locations * @throws IOException */ public static void putFileInFolderHDFS(FileSystem fs, int interval, int minuteSkip, String folderPrefix, String fileToBePut) throws IOException { List<String> folderPaths = TimeUtil.getMinuteDatesOnEitherSide(interval, minuteSkip); LOGGER.info("folderData: " + folderPaths.toString()); createFolders(fs, folderPrefix, folderPaths); if (fileToBePut.equals("_SUCCESS")) { copyDataToFolders(fs, folderPrefix, folderPaths, OSUtil.concat(OSUtil.NORMAL_INPUT, "_SUCCESS")); } else { copyDataToFolders(fs, folderPrefix, folderPaths, OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile4.txt")); } } /** * Uploads dataFile4.txt file to remote directories with names within date ranges. * @param fs target filesystem * @param interval dates ranges before and after current date * @param minuteSkip time to skip within a range to get intermediate directories * @param folderPrefix prefix for remote directories * @param postFix postfix for remote locations * @throws IOException */ public static void lateDataReplenishWithoutSuccess(FileSystem fs, int interval, int minuteSkip, String folderPrefix, String postFix) throws IOException { List<String> folderPaths = TimeUtil.getMinuteDatesOnEitherSide(interval, minuteSkip); LOGGER.info("folderData: " + folderPaths.toString()); if (postFix != null) { for (int i = 0; i < folderPaths.size(); i++) { folderPaths.set(i, folderPaths.get(i) + postFix); } } createFolders(fs, folderPrefix, folderPaths); copyDataToFolders(fs, folderPrefix, folderPaths, OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile4.txt")); } /** * Uploads both dataFile4.txt and _SUCCESS files to remote directories with names within date * ranges. * @param fs target filesystem * @param interval dates ranges before and after current date * @param minuteSkip time to skip within a range to get intermediate directories * @param folderPrefix prefix for remote directories * @param postFix postfix for remote locations * @throws IOException */ public static void lateDataReplenish(FileSystem fs, int interval, int minuteSkip, String folderPrefix, String postFix) throws IOException { List<String> folderPaths = TimeUtil.getMinuteDatesOnEitherSide(interval, minuteSkip); LOGGER.info("folderData: " + folderPaths.toString()); if (postFix != null) { for (int i = 0; i < folderPaths.size(); i++) { folderPaths.set(i, folderPaths.get(i) + postFix); } } createFolders(fs, folderPrefix, folderPaths); copyDataToFolders(fs, folderPrefix, folderPaths, OSUtil.concat(OSUtil.NORMAL_INPUT, "_SUCCESS"), OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile4.txt")); } /** * Creates empty folders in hdfs. * @param helper target * @param folderList list of folders * @throws IOException * @deprecated method creates filesystem object by itself. We should pass existing FileSystem * object to such methods. */ @Deprecated public static void createHDFSFolders(ColoHelper helper, List<String> folderList) throws IOException { LOGGER.info("creating folders....."); Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://" + helper.getFeedHelper().getHadoopURL()); final FileSystem fs = FileSystem.get(conf); for (final String folder : folderList) { if (StringUtils.isNotEmpty(folder)) { fs.mkdirs(new Path(cutProtocol(folder))); } } LOGGER.info("created folders....."); } }