/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package tachyon.util;
import java.io.IOException;
import java.net.UnknownHostException;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.log4j.Logger;
import tachyon.Constants;
import tachyon.Pair;
import tachyon.PrefixList;
import tachyon.UnderFileSystem;
import tachyon.Version;
import tachyon.client.TachyonFS;
/**
* Utilities related to under filesystem
*/
public class UnderfsUtils {
private static Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE);
/**
* Build a new path relative to a given TFS root by retrieving the given path relative to
* the ufsRootPath.
*
* @param tfsRootPath
* the destination point in TFS to load the under FS path onto
* @param ufsRootPath
* the source path in the under FS to be loaded
* @param path
* the path in the under FS be loaded, path.startsWith(ufsRootPath) must be true
* @return the new path relative to tfsRootPath.
*/
private static String buildTFSPath(String tfsRootPath, String ufsRootPath, String path) {
String filePath = path.substring(ufsRootPath.length());
if (filePath.isEmpty()) {
// retrieve the basename in ufsRootPath
filePath = path.substring(ufsRootPath.lastIndexOf(Constants.PATH_SEPARATOR) + 1);
}
return CommonUtils.concat(tfsRootPath, filePath);
}
/**
* Load files under path "ufsAddrRootPath" (excluding excludePathPrefix relative to the path)
* to the given tfs under a given destination path.
*
* @param tfsAddrRootPath
* the TFS address and path to load the src files, like "tachyon://host:port/dest".
* @param ufsAddrRootPath
* the address and root path of the under FS, like "hdfs://host:port/src".
* @param excludePaths
* paths to exclude from ufsRootPath, which will not be loaded in TFS.
* @throws IOException
*/
public static void loadUnderFs(String tfsAddrRootPath, String ufsAddrRootPath,
String excludePaths) throws IOException {
Pair<String, String> tfsPair = UnderFileSystem.parse(tfsAddrRootPath);
String tfsAddress = tfsPair.getFirst();
String tfsRootPath = tfsPair.getSecond();
TachyonFS tfs = TachyonFS.get(tfsAddress);
PrefixList excludePathPrefix = new PrefixList(excludePaths, ";");
loadUnderFs(tfs, tfsRootPath, ufsAddrRootPath, excludePathPrefix);
}
/**
* Load files under path "ufsAddress/ufsRootPath" (excluding excludePathPrefix)
* to the given tfs under the given tfsRootPath directory.
*
* @param tfs
* the TFS handler created out of address like "tachyon://host:port"
* @param tfsRootPath
* the destination point in TFS to load the under FS path onto
* @param ufsAddrRootPath
* the address and root path of the under FS, like "hdfs://host:port/dir".
* @param excludePathPrefix
* paths to exclude from ufsRootPath, which will not be registered in TFS.
* @throws IOException
*/
public static void loadUnderFs(TachyonFS tfs, String tfsRootPath, String ufsAddrRootPath,
PrefixList excludePathPrefix) throws IOException {
LOG.info(tfs + tfsRootPath + " " + ufsAddrRootPath + " " + excludePathPrefix);
try {
// resolve and replace hostname embedded in the given ufsAddress
String oldpath = ufsAddrRootPath;
ufsAddrRootPath = NetworkUtils.replaceHostName(ufsAddrRootPath);
if (!ufsAddrRootPath.equalsIgnoreCase(oldpath)) {
System.out.println("UnderFS hostname resolved: " + ufsAddrRootPath);
}
} catch (UnknownHostException e) {
LOG.info("hostname cannot be resolved in given UFS path: " + ufsAddrRootPath);
throw new IOException(e);
}
Pair<String, String> ufsPair = UnderFileSystem.parse(ufsAddrRootPath);
String ufsAddress = ufsPair.getFirst();
String ufsRootPath = ufsPair.getSecond();
if (!tfs.exist(tfsRootPath)) {
tfs.mkdir(tfsRootPath);
// TODO Add the following.
// if (tfs.mkdir(tfsRootPath)) {
// LOG.info("directory " + tfsRootPath + " does not exist in Tachyon: created");
// } else {
// throw new IOException("Failed to create folder in Tachyon: " + tfsRootPath);
// }
}
// create the under FS handler (e.g. hdfs, local FS, s3 etc.)
UnderFileSystem ufs = UnderFileSystem.get(ufsAddress);
Queue<String> ufsPathQueue = new LinkedList<String>();
if (excludePathPrefix.outList(ufsRootPath)) {
ufsPathQueue.add(ufsAddrRootPath);
}
while (!ufsPathQueue.isEmpty()) {
String ufsPath = ufsPathQueue.poll(); // this is the absolute path
LOG.info("loading: " + ufsPath);
if (ufs.isFile(ufsPath)) {
String tfsPath = buildTFSPath(tfsRootPath, ufsAddrRootPath, ufsPath);
if (tfs.exist(tfsPath)) {
LOG.info("File " + tfsPath + " already exists in Tachyon.");
continue;
}
int fileId = tfs.createFile(tfsPath, ufsPath);
if (fileId == -1) {
LOG.info("Failed to create tachyon file: " + tfsPath);
} else {
LOG.info("Create tachyon file " + tfsPath + " with file id " + fileId + " and "
+ "checkpoint location " + ufsPath);
}
} else { // ufsPath is a directory
String[] files = ufs.list(ufsPath); // ufs.list() returns relative path
if (files != null) {
for (String filePath : files) {
LOG.info("Get: " + filePath);
String aPath = CommonUtils.concat(ufsPath, filePath);
String checkPath = aPath.substring(ufsAddrRootPath.length());
if (checkPath.startsWith(Constants.PATH_SEPARATOR)) {
checkPath = checkPath.substring(Constants.PATH_SEPARATOR.length());
}
if (excludePathPrefix.inList(checkPath)) {
LOG.info("excluded: " + checkPath);
} else {
ufsPathQueue.add(aPath);
}
}
}
// ufsPath is a directory, so only concat the tfsRoot with the relative path
String tfsPath =
CommonUtils.concat(tfsRootPath, ufsPath.substring(ufsAddrRootPath.length()));
if (!tfs.exist(tfsPath)) {
tfs.mkdir(tfsPath);
// TODO Add the following.
// if (tfs.mkdir(tfsPath)) {
// LOG.info("Created TFS folder " + tfsPath + " with checkpoint location " + ufsPath);
// } else {
// LOG.info("Failed to create tachyon folder: " + tfsPath);
// }
}
}
}
}
public static void main(String[] args) {
if (!(args.length == 2 || args.length == 3)) {
printUsage();
System.exit(-1);
}
String exList = (args.length == 3) ? args[2] : "";
try {
loadUnderFs(args[0], args[1], exList);
} catch (Exception e) {
e.printStackTrace();
printUsage();
System.exit(-1);
}
System.exit(0);
}
public static void printUsage() {
String cmd =
"java -cp target/tachyon-" + Version.VERSION + "-jar-with-dependencies.jar "
+ "tachyon.util.UnderfsUtils ";
System.out.println("Usage: " + cmd + "<TachyonPath> <UnderfsPath> "
+ "[<Optional ExcludePathPrefix, separated by ;>]");
System.out
.println("Example: " + cmd + "tachyon://127.0.0.1:19998/a hdfs://localhost:9000/b c");
System.out.println("Example: " + cmd + "tachyon://127.0.0.1:19998/a file:///b c");
System.out.println("Example: " + cmd + "tachyon://127.0.0.1:19998/a /b c");
System.out.print("In the TFS, all files under local FS /b will be registered under /a, ");
System.out.println("except for those with prefix c");
}
}