/**
* This file is part of muCommander, http://www.mucommander.com
* Copyright (C) 2002-2016 Maxence Bernard
*
* muCommander is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* muCommander is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.mucommander.commons.file.protocol.hadoop;
import com.mucommander.commons.file.*;
import com.mucommander.commons.file.filter.FilenameFilter;
import com.mucommander.commons.file.protocol.ProtocolFile;
import com.mucommander.commons.io.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* This abstact class provides access to the Hadoop virtual filesystem, which, like the muCommander file API, provides a
* unified access to a number of file protocols.
*
* <p>{@link ProtocolFile} is fully implemented by <code>HadoopFile</code>. All is left for subclasses is to implement
* the abstract methods defined in this class.</p>
*
* @see HDFSFile
* @see S3File
* @author Maxence Bernard
*/
public abstract class HadoopFile extends ProtocolFile {
private static final Logger LOGGER = LoggerFactory.getLogger(HadoopFile.class);
/** The Hadoop FileSystem object */
private FileSystem fs;
/** The Hadoop */
private Path path;
/** Holds file attributes */
private HadoopFileAttributes fileAttributes;
/** Cached parent file instance, null if not created yet or if this file has no parent */
private AbstractFile parent;
/** Has the parent file been determined yet? */
private boolean parentValSet;
/** True if this file is currently being written */
private boolean isWriting;
/** Default Hadoop Configuration, whose values are fetched from XML configuration files. */
protected final static Configuration DEFAULT_CONFIGURATION = new Configuration();
protected HadoopFile(FileURL url) throws IOException {
this(url, null, null);
}
protected HadoopFile(FileURL url, FileSystem fs, FileStatus fileStatus) throws IOException {
super(url);
if(fs==null) {
try {
this.fs = getHadoopFileSystem(url);
}
catch(IOException e) {
throw e;
}
catch(Exception e) {
// FileSystem implementations throw IllegalArgumentException under various circumstances
throw new IOException(e.getMessage());
}
}
else {
this.fs = fs;
}
if(fileStatus==null) {
this.path = new Path(fileURL.getPath());
this.fileAttributes = new HadoopFileAttributes();
}
else {
this.fileAttributes = new HadoopFileAttributes(fileStatus);
this.path = fileStatus.getPath();
}
}
private OutputStream getOutputStream(boolean append) throws IOException {
OutputStream out = new CounterOutputStream(
append?fs.append(path):fs.create(path, true),
new ByteCounter() {
@Override
public synchronized void add(long nbBytes) {
fileAttributes.addToSize(nbBytes);
fileAttributes.setDate(System.currentTimeMillis());
}
}
) {
@Override
public void close() throws IOException {
super.close();
isWriting = false;
}
};
// Update local attributes
fileAttributes.setExists(true);
fileAttributes.setDate(System.currentTimeMillis());
fileAttributes.setSize(0);
isWriting = true;
return out;
}
/////////////////////////////////
// AbstractFile implementation //
/////////////////////////////////
@Override
public AbstractFile getParent() {
if(!parentValSet) {
FileURL parentFileURL = this.fileURL.getParent();
if(parentFileURL!=null)
parent = FileFactory.getFile(fileURL.getParent());
parentValSet = true;
}
return parent;
}
@Override
public void setParent(AbstractFile parent) {
this.parent = parent;
this.parentValSet = true;
}
@Override
public Object getUnderlyingFileObject() {
return fileAttributes;
}
// File attributes manipulation
@Override
public boolean exists() {
return fileAttributes.exists();
}
@Override
public boolean isDirectory() {
return fileAttributes.isDirectory();
}
/**
* Always returns <code>false</code>, Hadoop filesystems have no symlink support.
*
* @return returns <code>false</code>, Hadoop filesystems have no symlink support.
*/
@Override
public boolean isSymlink() {
// No support for symlinks
return false;
}
@Override
public boolean isSystem() {
return false;
}
@Override
public long getDate() {
return fileAttributes.getDate();
}
@Override
public long getSize() {
return fileAttributes.getSize();
}
@Override
public PermissionBits getChangeablePermissions() {
return FilePermissions.FULL_PERMISSION_BITS;
}
@Override
public FilePermissions getPermissions() {
return fileAttributes.getPermissions();
}
@Override
public String getOwner() {
return fileAttributes.getOwner();
}
@Override
public boolean canGetOwner() {
return true;
}
@Override
public String getGroup() {
return fileAttributes.getGroup();
}
@Override
public boolean canGetGroup() {
return true;
}
// Supported file operations
@Override
public void mkdir() throws IOException {
if(exists() || !fs.mkdirs(path))
throw new IOException();
// Update local attributes
fileAttributes.setExists(true);
fileAttributes.setDirectory(true);
fileAttributes.setDate(System.currentTimeMillis());
fileAttributes.setSize(0);
}
@Override
public void delete() throws IOException {
if(!fs.delete(path, false))
throw new IOException();
// Update local attributes
fileAttributes.setExists(false);
fileAttributes.setDirectory(false);
fileAttributes.setSize(0);
}
@Override
public void renameTo(AbstractFile destFile) throws IOException {
// Throw an exception if the file cannot be renamed to the specified destination
checkRenamePrerequisites(destFile, false, false);
// Delete the destination if it already exists as FileSystem#rename would otherwise fail.
// Note: HadoopFile#delete() does not delete directories recursively (good).
if(destFile.exists())
destFile.delete();
if(!fs.rename(path, ((HadoopFile)destFile).path))
throw new IOException();
// Update destination file attributes by fetching them from the server
((HadoopFileAttributes)destFile.getUnderlyingFileObject()).fetchAttributes();
// Update this file's attributes locally
fileAttributes.setExists(false);
fileAttributes.setDirectory(false);
fileAttributes.setSize(0);
}
@Override
public void changeDate(long lastModified) throws IOException {
// Note: setTimes seems to fail on HDFS directories.
fs.setTimes(path, lastModified, lastModified);
// Update local attributes
fileAttributes.setDate(lastModified);
}
@Override
public void changePermission(PermissionAccess access, PermissionType permission, boolean enabled) throws IOException {
changePermissions(ByteUtils.setBit(getPermissions().getIntValue(), (permission.toInt() << (access.toInt()*3)), enabled));
}
@Override
public InputStream getInputStream() throws IOException {
return fs.open(path);
}
@Override
public OutputStream getOutputStream() throws IOException {
return getOutputStream(false);
}
@Override
public RandomAccessInputStream getRandomAccessInputStream() throws IOException {
return new HadoopRandomAccessInputStream(fs.open(path), getSize());
}
@Override
public AbstractFile[] ls() throws IOException {
return ls(null);
}
// Unsupported file operations
@Override
@UnsupportedFileOperation
public OutputStream getAppendOutputStream() throws IOException {
// Currently not supported by any of the filesystems (S3, HDFS)
throw new UnsupportedFileOperationException(FileOperation.APPEND_FILE);
}
@Override
@UnsupportedFileOperation
public RandomAccessOutputStream getRandomAccessOutputStream() throws IOException, UnsupportedFileOperationException {
throw new UnsupportedFileOperationException(FileOperation.RANDOM_WRITE_FILE);
}
/**
* Always throws {@link UnsupportedFileOperationException} when called.
*
* @throws UnsupportedFileOperationException, always
*/
@Override
@UnsupportedFileOperation
public void copyRemotelyTo(AbstractFile destFile) throws UnsupportedFileOperationException {
// TODO: implement for S3
throw new UnsupportedFileOperationException(FileOperation.COPY_REMOTELY);
}
/**
* Always throws {@link UnsupportedFileOperationException} when called.
*
* @throws UnsupportedFileOperationException, always
*/
@Override
@UnsupportedFileOperation
public long getFreeSpace() throws UnsupportedFileOperationException {
throw new UnsupportedFileOperationException(FileOperation.GET_FREE_SPACE);
}
/**
* Always throws {@link UnsupportedFileOperationException} when called.
*
* @throws UnsupportedFileOperationException, always
*/
@Override
@UnsupportedFileOperation
public long getTotalSpace() throws UnsupportedFileOperationException {
throw new UnsupportedFileOperationException(FileOperation.GET_TOTAL_SPACE);
}
////////////////////////
// Overridden methods //
////////////////////////
@Override
public AbstractFile[] ls(FilenameFilter filter) throws IOException {
// We need to ensure that the file is a directory: if it isn't listStatus returns an empty array but doesn't
// throw an exception
if(!exists() || !isDirectory())
throw new IOException();
FileStatus[] statuses = filter==null
?fs.listStatus(path)
:fs.listStatus(path, new HadoopFilenameFilter(filter));
int nbChildren = statuses==null?0:statuses.length;
AbstractFile[] children = new AbstractFile[nbChildren];
String parentPath = fileURL.getPath();
if(!parentPath.endsWith("/"))
parentPath += "/";
FileURL childURL;
FileStatus childStatus;
for(int i=0; i<nbChildren; i++) {
childStatus = statuses[i];
childURL = (FileURL)fileURL.clone();
childURL.setPath(parentPath + childStatus.getPath().getName());
children[i] = FileFactory.getFile(childURL, this, fs, childStatus);
}
return children;
}
@Override
public void changePermissions(int permissions) throws IOException, UnsupportedFileOperationException {
fs.setPermission(path, new FsPermission((short)permissions));
// Update local attributes
fileAttributes.setPermissions(new SimpleFilePermissions(permissions));
}
//////////////////////
// Abstract methods //
//////////////////////
/**
* Returns a Hadoop {@link FileSystem} instance for the specified realm.
*
* @param realm authentication realm
* @return a Hadoop {@link FileSystem} instance for the specified realm.
* @throws IOException if the FileSystem failed to be instantiated
*/
protected abstract FileSystem getHadoopFileSystem(FileURL realm) throws IOException;
/**
* Sets default file attributes values for the file represented by the given URL. The atributes that need to be
* set are those that are protocol-specific.
*
* @param url URL of the file for which to set attributes
* @param atts the file attributes to set
*/
protected abstract void setDefaultFileAttributes(FileURL url, HadoopFileAttributes atts);
///////////////////
// Inner classes //
///////////////////
/**
* HadoopFileAttributes provides getters and setters for Hadoop file attributes. By extending
* <code>SyncedFileAttributes</code>, this class caches attributes for a certain amount of time
* after which fresh values are retrieved from the server.
*/
class HadoopFileAttributes extends SyncedFileAttributes {
private final static int TTL = 60000;
// this constructor is called by the public constructor
private HadoopFileAttributes() throws AuthException {
super(TTL, false); // no initial update
fetchAttributes(); // throws AuthException if no or bad credentials
updateExpirationDate(); // declare the attributes as 'fresh'
}
// this constructor is called by #ls()
private HadoopFileAttributes(FileStatus fileStatus) {
super(TTL, false); // no initial update
setAttributes(fileStatus);
setExists(true);
updateExpirationDate(); // declare the attributes as 'fresh'
}
private void fetchAttributes() throws AuthException {
// Do not update attributes while the file is being written, as they are not reflected immediately on the
// name node.
if(isWriting)
return;
try {
setAttributes(fs.getFileStatus(path));
setExists(true);
}
catch(IOException e) {
// File doesn't exist on the server
setExists(false);
setDefaultFileAttributes(getURL(), this);
// Rethrow AuthException
if(e instanceof AuthException)
throw (AuthException)e;
}
}
/**
* Sets the file attributes using the values contained in the specified hadoop FileAttributes instance.
*
* @param fileStatus FileStatus instance that contains the file attributes values to use
*/
private void setAttributes(FileStatus fileStatus) {
setDirectory(fileStatus.isDir());
setDate(fileStatus.getModificationTime());
setSize(fileStatus.getLen());
setPermissions(new SimpleFilePermissions(
fileStatus.getPermission().toShort() & PermissionBits.FULL_PERMISSION_INT
));
setOwner(fileStatus.getOwner());
setGroup(fileStatus.getGroup());
}
/**
* Increments the size attribute's value by the given number of bytes.
*
* @param increment number of bytes to add to the current size attribute's value
*/
private void addToSize(long increment) {
setSize(getSize()+increment);
}
/////////////////////////////////////////
// SyncedFileAttributes implementation //
/////////////////////////////////////////
@Override
public void updateAttributes() {
try {
fetchAttributes();
}
catch(Exception e) { // AuthException
LOGGER.info("Failed to update attributes", e);
}
}
}
/**
* Turns a Hadoop {@link FSDataInputStream} into a {@link RandomAccessInputStream}.
*/
private static class HadoopRandomAccessInputStream extends RandomAccessInputStream {
private FSDataInputStream in;
private long length;
private HadoopRandomAccessInputStream(FSDataInputStream in, long length) {
this.in = in;
this.length = length;
}
public long getOffset() throws IOException {
return in.getPos();
}
public long getLength() throws IOException {
return length;
}
public void seek(long offset) throws IOException {
in.seek(offset);
}
@Override
public int read() throws IOException {
return in.read();
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
return in.read(b, off, len);
}
@Override
public void close() throws IOException {
}
}
/**
* Turns a {@link FilenameFilter} into a Hadoop {@link PathFilter}.
*/
private static class HadoopFilenameFilter implements PathFilter {
private FilenameFilter filenameFilter;
private HadoopFilenameFilter(FilenameFilter filenameFilter) {
this.filenameFilter = filenameFilter;
}
///////////////////////////////
// PathFilter implementation //
///////////////////////////////
public boolean accept(Path path) {
return filenameFilter.accept(path.getName());
}
}
}