/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.nativeio;
import java.io.File;
import java.io.FileDescriptor;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.HardLink;
import org.apache.hadoop.util.InjectionEventCore;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.util.NativeCodeLoader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* JNI wrappers for various native IO-related calls not available in Java.
* These functions should generally be used alongside a fallback to another
* more portable mechanism.
*/
public class NativeIO {
// Flags for open() call from bits/fcntl.h
public static final int O_RDONLY = 00;
public static final int O_WRONLY = 01;
public static final int O_RDWR = 02;
public static final int O_CREAT = 0100;
public static final int O_EXCL = 0200;
public static final int O_NOCTTY = 0400;
public static final int O_TRUNC = 01000;
public static final int O_APPEND = 02000;
public static final int O_NONBLOCK = 04000;
public static final int O_SYNC = 010000;
public static final int O_ASYNC = 020000;
public static final int O_FSYNC = O_SYNC;
public static final int O_NDELAY = O_NONBLOCK;
// Flags for posix_fadvise() from bits/fcntl.h
/* No further special treatment. */
public static final int POSIX_FADV_NORMAL = 0;
/* Expect random page references. */
public static final int POSIX_FADV_RANDOM = 1;
/* Expect sequential page references. */
public static final int POSIX_FADV_SEQUENTIAL = 2;
/* Will need these pages. */
public static final int POSIX_FADV_WILLNEED = 3;
/* Don't need these pages. */
public static final int POSIX_FADV_DONTNEED = 4;
/* Data will be accessed once. */
public static final int POSIX_FADV_NOREUSE = 5;
// Flags for clock_gettime from time.h
public static final int CLOCK_REALTIME = 0;
public static final int CLOCK_MONOTONIC = 1;
public static final int CLOCK_PROCESS_CPUTIME_ID = 2;
public static final int CLOCK_THREAD_CPUTIME_ID = 3;
public static final int CLOCK_MONOTONIC_RAW = 4;
public static final int CLOCK_REALTIME_COARSE = 5;
public static final int CLOCK_MONOTONIC_COARSE = 6;
/* Wait upon writeout of all pages
in the range before performing the
write. */
public static final int SYNC_FILE_RANGE_WAIT_BEFORE = 1;
/* Initiate writeout of all those
dirty pages in the range which are
not presently under writeback. */
public static final int SYNC_FILE_RANGE_WRITE = 2;
/* Wait upon writeout of all pages in
the range after performing the
write. */
public static final int SYNC_FILE_RANGE_WAIT_AFTER = 4;
private static final Log LOG = LogFactory.getLog(NativeIO.class);
private static boolean nativeLoaded = false;
private static boolean workaroundNonThreadSafePasswdCalls = false;
private static boolean fadvisePossible = true;
private static boolean ioprioPossible = true;
private static boolean syncFileRangePossible = true;
static final String WORKAROUND_NON_THREADSAFE_CALLS_KEY =
"hadoop.workaround.non.threadsafe.getpwuid";
static final boolean WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT = false;
// Copied from ioprio.h
public static final int IOPRIO_CLASS_NONE = 0;
public static final int IOPRIO_CLASS_RT = 1;
public static final int IOPRIO_CLASS_BE = 2;
public static final int IOPRIO_CLASS_IDLE = 3;
static {
if (NativeCodeLoader.isNativeCodeLoaded()) {
try {
Configuration conf = new Configuration();
workaroundNonThreadSafePasswdCalls = conf.getBoolean(
WORKAROUND_NON_THREADSAFE_CALLS_KEY,
WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT);
initNative();
nativeLoaded = true;
} catch (Throwable t) {
// This can happen if the user has an older version of libhadoop.so
// installed - in this case we can continue without native IO
// after warning
LOG.error("Unable to initialize NativeIO libraries", t);
}
}
}
public static boolean isfadvisePossible() {
return fadvisePossible;
}
public static boolean isIoprioPossible() {
return ioprioPossible;
}
/**
* Return true if the JNI-based native IO extensions are available.
*/
public static boolean isAvailable() {
return NativeCodeLoader.isNativeCodeLoaded() && nativeLoaded;
}
/** Wrapper around open(2) */
public static native FileDescriptor open(String path, int flags, int mode) throws IOException;
/** Wrapper around fstat(2) */
public static native Stat fstat(FileDescriptor fd) throws IOException;
/** Wrapper around stat(2) */
public static native Stat stat(String path) throws IOException;
/** Wrapper around link(2) */
public static native void link(String src, String dst) throws IOException;
/** Wrapper around chmod(2) */
public static native void chmod(String path, int mode) throws IOException;
/** Wrapper around fsync(2) (Java does not support fsync on directory) */
public static native void fsync(String path) throws IOException;
/** Wrapper around posix_fadvise(2) */
public static native void posix_fadvise(
FileDescriptor fd, long offset, long len, int flags) throws NativeIOException;
/** Wrapper around sync_file_range(2) */
static native void sync_file_range(
FileDescriptor fd, long offset, long nbytes, int flags) throws NativeIOException;
/** Initialize the JNI method ID and class ID cache */
private static native void initNative();
/**
* Wrapper around ioprio_set, we always do this for the current thread so
* we omit 'which' and 'who'.
*/
static native void ioprio_set(int classOfService, int priority) throws IOException;
/**
* Wrapper around ioprio_set, we always do this for the current thread so
* we omit 'which' and 'who'. This is different from ioprio_set(class,
* priority) in the sense that this is the value that is returned by
* ioprio_get and we can directly pass this value in to reset the
* priority of a thread.
*/
static native void ioprio_set(int ioprio_prio_value) throws IOException;
/**
* Wrapper around ioprio_get, we always do this for the current thread so
* we omit 'which' and 'who'.
*/
static native int ioprio_get() throws IOException;
/**
* Wrapper around native stat()
*/
public static Stat stat(File file) throws IOException {
if (file == null) {
throw new IllegalArgumentException("Null parameter passed");
}
return stat(file.getAbsolutePath());
}
/**
* Wrapper around native link()
*/
public static void link(File src, File dst) throws IOException {
if (src == null || dst == null) {
throw new IllegalArgumentException("Null parameter passed");
}
if (isAvailable()) {
link(src.getAbsolutePath(), dst.getAbsolutePath());
} else {
HardLink.createHardLink(src, dst);
}
}
/**
* Wrapper around native clock_gettime()
*/
public static native void clock_gettime(int which_clock,
TimeSpec tp) throws IOException;
public static void validateIoprioSet(int classOfService, int data) {
if (classOfService < 0 || classOfService > 3) {
throw new IllegalArgumentException("Invalid class of service : "
+ classOfService + " (0-3) supported");
}
if (data < 0 || data > 7) {
throw new IllegalArgumentException("Invalid class of service : "
+ classOfService + " (0-7) supported");
}
}
public static void validatePosixFadvise(int advise) {
if (advise < NativeIO.POSIX_FADV_NORMAL
|| advise > NativeIO.POSIX_FADV_NOREUSE) {
throw new IllegalArgumentException("Invalid posix fadvise : " + advise);
}
}
/**
* Call ioprio_get for this thread.
*
* @throws NativeIOException
* if there is an error with the syscall
* @return -1 on failure, ioprio value on success.
*/
public static int ioprioGetIfPossible() throws IOException {
if (nativeLoaded && ioprioPossible) {
try {
return ioprio_get();
} catch (UnsupportedOperationException uoe) {
LOG.warn("ioprioGetIfPossible() failed", uoe);
ioprioPossible = false;
} catch (UnsatisfiedLinkError ule) {
LOG.warn("ioprioGetIfPossible() failed", ule);
ioprioPossible = false;
} catch (NativeIOException nie) {
LOG.warn("ioprioGetIfPossible() failed", nie);
throw nie;
}
}
return -1;
}
/**
* Call ioprio_set(ioprio_value) for this thread.
*
* @throws NativeIOException
* if there is an error with the syscall
*/
public static void ioprioSetIfPossible(int ioprio_value) throws IOException {
if (nativeLoaded && ioprioPossible) {
try {
ioprio_set(ioprio_value);
} catch (UnsupportedOperationException uoe) {
LOG.warn("ioprioSetIfPossible() failed", uoe);
ioprioPossible = false;
} catch (UnsatisfiedLinkError ule) {
LOG.warn("ioprioSetIfPossible() failed", ule);
ioprioPossible = false;
} catch (NativeIOException nie) {
LOG.warn("ioprioSetIfPossible() failed", nie);
throw nie;
}
}
}
/**
* Call ioprio_set(class, data) for this thread.
*
* @throws NativeIOException
* if there is an error with the syscall
*/
public static void ioprioSetIfPossible(int classOfService, int data)
throws IOException {
if (nativeLoaded && ioprioPossible) {
if (classOfService == IOPRIO_CLASS_NONE) {
// ioprio is disabled.
return;
}
try {
ioprio_set(classOfService, data);
} catch (UnsupportedOperationException uoe) {
LOG.warn("ioprioSetIfPossible() failed", uoe);
ioprioPossible = false;
} catch (UnsatisfiedLinkError ule) {
LOG.warn("ioprioSetIfPossible() failed", ule);
ioprioPossible = false;
} catch (NativeIOException nie) {
LOG.warn("ioprioSetIfPossible() failed", nie);
throw nie;
}
}
}
/**
* Calls fsync on the given file/dir path.
*/
public static void fsyncIfPossible(String path) throws IOException {
if (nativeLoaded) {
fsync(path);
} else {
LOG.warn("Cannot fsync : " + path +
" since native libraries are not available");
}
}
/**
* Call posix_fadvise on the given file descriptor. See the manpage
* for this syscall for more information. On systems where this
* call is not available, does nothing.
*
* @throws NativeIOException if there is an error with the syscall
*/
public static void posixFadviseIfPossible(
FileDescriptor fd, long offset, long len, int flags)
throws NativeIOException {
if (nativeLoaded && fadvisePossible) {
try {
posix_fadvise(fd, offset, len, flags);
InjectionHandler.processEvent(
InjectionEventCore.NATIVEIO_POSIX_FADVISE, flags);
} catch (UnsupportedOperationException uoe) {
LOG.warn("posixFadviseIfPossible() failed", uoe);
fadvisePossible = false;
} catch (UnsatisfiedLinkError ule) {
LOG.warn("posixFadviseIfPossible() failed", ule);
fadvisePossible = false;
} catch (NativeIOException nie) {
LOG.warn("posixFadviseIfPossible() failed", nie);
throw nie;
}
}
}
/**
* Call sync_file_range on the given file descriptor. See the manpage
* for this syscall for more information. On systems where this
* call is not available, does nothing.
*
* @throws NativeIOException if there is an error with the syscall
*/
public static void syncFileRangeIfPossible(
FileDescriptor fd, long offset, long nbytes, int flags)
throws NativeIOException {
InjectionHandler.processEvent(InjectionEventCore.NATIVEIO_SYNC_FILE_RANGE,
flags);
if (nativeLoaded && syncFileRangePossible) {
try {
sync_file_range(fd, offset, nbytes, flags);
} catch (UnsupportedOperationException uoe) {
LOG.warn("syncFileRangeIfPossible() failed", uoe);
syncFileRangePossible = false;
} catch (UnsatisfiedLinkError ule) {
LOG.warn("syncFileRangeIfPossible() failed", ule);
syncFileRangePossible = false;
} catch (NativeIOException nie) {
LOG.warn("syncFileRangeIfPossible() failed: fd " + fd + " offset "
+ offset + " nbytes " + nbytes + " flags " + flags, nie);
throw nie;
}
}
}
public static void clockGetTimeIfPossible(int which_clock,
TimeSpec tp) throws IOException {
if (nativeLoaded) {
clock_gettime(which_clock, tp);
} else {
throw new IOException("Native not loaded.");
}
}
/**
* Result type of the fstat call
*/
public static class Stat {
private final String owner, group;
private final int mode;
// The number of hardlinks for this file.
private final int hardlinks;
// The inode number for this file.
private final long inode;
// Mode constants
public static final int S_IFMT = 0170000; /* type of file */
public static final int S_IFIFO = 0010000; /* named pipe (fifo) */
public static final int S_IFCHR = 0020000; /* character special */
public static final int S_IFDIR = 0040000; /* directory */
public static final int S_IFBLK = 0060000; /* block special */
public static final int S_IFREG = 0100000; /* regular */
public static final int S_IFLNK = 0120000; /* symbolic link */
public static final int S_IFSOCK = 0140000; /* socket */
public static final int S_IFWHT = 0160000; /* whiteout */
public static final int S_ISUID = 0004000; /* set user id on execution */
public static final int S_ISGID = 0002000; /* set group id on execution */
public static final int S_ISVTX = 0001000; /* save swapped text even after use */
public static final int S_IRUSR = 0000400; /* read permission, owner */
public static final int S_IWUSR = 0000200; /* write permission, owner */
public static final int S_IXUSR = 0000100; /* execute/search permission, owner */
Stat(String owner, String group, int mode, int hardlinks, long inode) {
this.owner = owner;
this.group = group;
this.mode = mode;
this.hardlinks = hardlinks;
this.inode = inode;
}
public String toString() {
return "Stat(owner='" + owner + "', group='" + group + "'" +
", mode=" + mode + ", hardlinks=" + hardlinks +
", inode=" + inode + ")";
}
public String getOwner() {
return owner;
}
public String getGroup() {
return group;
}
public int getMode() {
return mode;
}
public long getInode() {
return this.inode;
}
public int getHardLinks() {
return this.hardlinks;
}
}
/**
* Result type of the clock_gettime call
*/
public static class TimeSpec {
public long tv_sec = 0;
public long tv_nsec = 0;
public String toString() {
return "{tv_sec=" + tv_sec + "," + "tv_nsec=" + tv_nsec + "}";
}
}
}