/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package tachyon.hadoop; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Progressable; import org.apache.log4j.Logger; import tachyon.Constants; import tachyon.PrefixList; import tachyon.client.TachyonFS; import tachyon.client.TachyonFile; import tachyon.client.WriteType; import tachyon.conf.CommonConf; import tachyon.thrift.ClientBlockInfo; import tachyon.thrift.ClientDependencyInfo; import tachyon.thrift.ClientFileInfo; import tachyon.thrift.NetAddress; import tachyon.util.CommonUtils; import tachyon.util.UnderfsUtils; /** * An Hadoop FileSystem interface implementation. Any program working with Hadoop HDFS can work with * Tachyon transparently by using this class. However, it is not as efficient as using the Tachyon * API in tachyon.client package. */ public class TFS extends FileSystem { public static final String FIRST_COM_PATH = "tachyon_dep/"; public static final String RECOMPUTE_PATH = "tachyon_recompute/"; public static String UNDERFS_ADDRESS; private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE); private URI mUri = null; private Path mWorkingDir = new Path(Constants.PATH_SEPARATOR); private TachyonFS mTFS = null; private String mTachyonHeader = null; @Override public FSDataOutputStream append(Path cPath, int bufferSize, Progressable progress) throws IOException { LOG.info("append(" + cPath + ", " + bufferSize + ", " + progress + ")"); String path = Utils.getPathWithoutScheme(cPath); fromHdfsToTachyon(path); int fileId = mTFS.getFileId(path); TachyonFile file = mTFS.getFile(fileId); if (file.length() > 0) { LOG.warn("This maybe an error."); } return new FSDataOutputStream(file.getOutStream(WriteType.CACHE_THROUGH), null); } @Override public FSDataOutputStream create(Path cPath, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { LOG.info("create(" + cPath + ", " + permission + ", " + overwrite + ", " + bufferSize + ", " + replication + ", " + blockSize + ", " + progress + ")"); if (!CommonConf.get().ASYNC_ENABLED) { String path = Utils.getPathWithoutScheme(cPath); if (mTFS.exist(path)) { if (!mTFS.delete(path, false)) { throw new IOException("Failed to delete existing data " + cPath); } } int fileId = mTFS.createFile(path, blockSize); TachyonFile file = mTFS.getFile(fileId); file.setUFSConf(getConf()); return new FSDataOutputStream(file.getOutStream(WriteType.CACHE_THROUGH), null); } if (cPath.toString().contains(FIRST_COM_PATH) && !cPath.toString().contains("SUCCESS")) { String path = Utils.getPathWithoutScheme(cPath); mTFS.createFile(path, blockSize); path = path.substring(path.indexOf(FIRST_COM_PATH) + FIRST_COM_PATH.length()); path = path.substring(0, path.indexOf(Constants.PATH_SEPARATOR)); int depId = Integer.parseInt(path); LOG.info("create(" + cPath + ") : " + path + " " + depId); path = Utils.getPathWithoutScheme(cPath); path = path.substring(path.indexOf("part-") + 5); int index = Integer.parseInt(path); ClientDependencyInfo info = mTFS.getClientDependencyInfo(depId); int fileId = info.getChildren().get(index); LOG.info("create(" + cPath + ") : " + path + " " + index + " " + info + " " + fileId); TachyonFile file = mTFS.getFile(fileId); file.setUFSConf(getConf()); // if (file.getBlockSizeByte() != blockSize) { // throw new IOException("File already exist with a different blocksize " // file.getBlockSizeByte() + " != " + blockSize); // } return new FSDataOutputStream(file.getOutStream(WriteType.ASYNC_THROUGH), null); } if (cPath.toString().contains(RECOMPUTE_PATH) && !cPath.toString().contains("SUCCESS")) { String path = Utils.getPathWithoutScheme(cPath); mTFS.createFile(path, blockSize); path = path.substring(path.indexOf(RECOMPUTE_PATH) + RECOMPUTE_PATH.length()); path = path.substring(0, path.indexOf(Constants.PATH_SEPARATOR)); int depId = Integer.parseInt(path); LOG.info("create(" + cPath + ") : " + path + " " + depId); path = Utils.getPathWithoutScheme(cPath); path = path.substring(path.indexOf("part-") + 5); int index = Integer.parseInt(path); ClientDependencyInfo info = mTFS.getClientDependencyInfo(depId); int fileId = info.getChildren().get(index); LOG.info("create(" + cPath + ") : " + path + " " + index + " " + info + " " + fileId); TachyonFile file = mTFS.getFile(fileId); file.setUFSConf(getConf()); // if (file.getBlockSizeByte() != blockSize) { // throw new IOException("File already exist with a different blocksize " // file.getBlockSizeByte() + " != " + blockSize); // } return new FSDataOutputStream(file.getOutStream(WriteType.ASYNC_THROUGH), null); } else { String path = Utils.getPathWithoutScheme(cPath); int fileId; WriteType type = WriteType.CACHE_THROUGH; if (mTFS.exist(path)) { fileId = mTFS.getFileId(path); type = WriteType.MUST_CACHE; } else { fileId = mTFS.createFile(path, blockSize); } TachyonFile file = mTFS.getFile(fileId); file.setUFSConf(getConf()); // if (file.getBlockSizeByte() != blockSize) { // throw new IOException("File already exist with a different blocksize " // file.getBlockSizeByte() + " != " + blockSize); // } return new FSDataOutputStream(file.getOutStream(type), null); } } @Override @Deprecated public boolean delete(Path path) throws IOException { return delete(path, true); } @Override public boolean delete(Path path, boolean recursive) throws IOException { LOG.info("delete(" + path + ", " + recursive + ")"); String tPath = Utils.getPathWithoutScheme(path); fromHdfsToTachyon(tPath); return mTFS.delete(tPath, recursive); } private void fromHdfsToTachyon(String path) throws IOException { if (!mTFS.exist(path)) { Path hdfsPath = Utils.getHDFSPath(path); FileSystem fs = hdfsPath.getFileSystem(getConf()); if (fs.exists(hdfsPath)) { String ufsAddrPath = CommonUtils.concat(UNDERFS_ADDRESS, path); // Set the path as the TFS root path. UnderfsUtils.loadUnderFs(mTFS, path, ufsAddrPath, new PrefixList(null)); } } } @Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { if (file == null) { return null; } String path = Utils.getPathWithoutScheme(file.getPath()); fromHdfsToTachyon(path); int fileId = mTFS.getFileId(path); if (fileId == -1) { throw new FileNotFoundException("File does not exist: " + file.getPath()); } List<BlockLocation> blockLocations = new ArrayList<BlockLocation>(); List<ClientBlockInfo> blocks = mTFS.getFileBlocks(fileId); for (int k = 0; k < blocks.size(); k ++) { ClientBlockInfo info = blocks.get(k); long offset = info.getOffset(); long end = offset + info.getLength(); if ((offset >= start && offset <= start + len) || (end >= start && end <= start + len)) { ArrayList<String> names = new ArrayList<String>(); ArrayList<String> hosts = new ArrayList<String>(); for (NetAddress addr : info.getLocations()) { names.add(addr.mHost); hosts.add(addr.mHost); } blockLocations.add(new BlockLocation(CommonUtils.toStringArray(names), CommonUtils .toStringArray(hosts), offset, info.getLength())); } } BlockLocation[] ret = new BlockLocation[blockLocations.size()]; for (int k = 0; k < blockLocations.size(); k ++) { ret[k] = blockLocations.get(k); } return ret; } @Override /** * Return the status of a single file. If the file does not exist in Tachyon, query it from HDFS. */ public FileStatus getFileStatus(Path path) throws IOException { String tPath = Utils.getPathWithoutScheme(path); Path hdfsPath = Utils.getHDFSPath(tPath); LOG.info("getFileStatus(" + path + "): HDFS Path: " + hdfsPath + " TPath: " + mTachyonHeader + tPath); fromHdfsToTachyon(tPath); TachyonFile file = mTFS.getFile(tPath); if (file == null) { LOG.info("File does not exist: " + path); throw new FileNotFoundException("File does not exist: " + path); } FileStatus ret = new FileStatus(file.length(), file.isDirectory(), file.getDiskReplication(), file.getBlockSizeByte(), file.getCreationTimeMs(), file.getCreationTimeMs(), null, null, null, new Path(mTachyonHeader + tPath)); return ret; } /** * Returns an object implementing the Tachyon-specific client API. * * @return null if initialize() hasn't been called. */ public TachyonFS getTachyonFS() { return mTFS; } @Override public URI getUri() { return mUri; } @Override public Path getWorkingDirectory() { LOG.info("getWorkingDirectory: " + mWorkingDir); return mWorkingDir; } /** * Initialize the class, have a lazy connection with Tachyon through mTFS. */ @Override public void initialize(URI uri, Configuration conf) throws IOException { super.initialize(uri, conf); LOG.info("initialize(" + uri + ", " + conf + "). Connecting to Tachyon: " + uri.toString()); Utils.addS3Credentials(conf); setConf(conf); mTachyonHeader = uri.getScheme() + "://" + uri.getHost() + ":" + uri.getPort(); mTFS = TachyonFS.get(mTachyonHeader); mUri = URI.create(mTachyonHeader); UNDERFS_ADDRESS = mTFS.getUnderfsAddress(); LOG.info(mTachyonHeader + " " + mUri + " " + UNDERFS_ADDRESS); } @Override /** * List entries of a path */ public FileStatus[] listStatus(Path path) throws IOException { String tPath = Utils.getPathWithoutScheme(path); Path hdfsPath = Utils.getHDFSPath(tPath); LOG.info("listStatus(" + path + "): HDFS Path: " + hdfsPath); fromHdfsToTachyon(tPath); if (!mTFS.exist(tPath)) { throw new FileNotFoundException("File does not exist: " + path); } List<ClientFileInfo> files = mTFS.listStatus(tPath); FileStatus[] ret = new FileStatus[files.size()]; for (int k = 0; k < files.size(); k ++) { ClientFileInfo info = files.get(k); // TODO replicate 3 with the number of disk replications. ret[k] = new FileStatus(info.getLength(), info.isFolder, 3, info.getBlockSizeByte(), info.getCreationTimeMs(), info.getCreationTimeMs(), null, null, null, new Path( mTachyonHeader + info.getPath())); } return ret; } @Override public boolean mkdirs(Path cPath, FsPermission permission) throws IOException { LOG.info("mkdirs(" + cPath + ", " + permission + ")"); return mTFS.mkdir(Utils.getPathWithoutScheme(cPath)); } @Override /** * Return the FSDataInputStream of a file. */ public FSDataInputStream open(Path cPath, int bufferSize) throws IOException { LOG.info("open(" + cPath + ", " + bufferSize + ")"); String path = Utils.getPathWithoutScheme(cPath); fromHdfsToTachyon(path); int fileId = mTFS.getFileId(path); return new FSDataInputStream(new HdfsFileInputStream(mTFS, fileId, Utils.getHDFSPath(path), getConf(), bufferSize)); } @Override public boolean rename(Path src, Path dst) throws IOException { LOG.info("rename(" + src + ", " + dst + ")"); String hSrc = Utils.getPathWithoutScheme(src); String hDst = Utils.getPathWithoutScheme(dst); fromHdfsToTachyon(hSrc); return mTFS.rename(hSrc, hDst); } @Override public void setWorkingDirectory(Path path) { LOG.info("setWorkingDirectory(" + path + ")"); if (path.isAbsolute()) { mWorkingDir = path; } else { mWorkingDir = new Path(mWorkingDir, path); } } }