package org.apache.blur.store.hdfs;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.blur.metrics.MetricsConstants.HDFS;
import static org.apache.blur.metrics.MetricsConstants.ORG_APACHE_BLUR;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.Timer;
import java.util.TimerTask;
import java.util.TreeSet;
import java.util.WeakHashMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
import org.apache.blur.BlurConfiguration;
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
import org.apache.blur.memory.MemoryLeakDetector;
import org.apache.blur.store.blockcache.LastModified;
import org.apache.blur.store.hdfs_v2.HdfsUtils;
import org.apache.blur.trace.Trace;
import org.apache.blur.trace.Tracer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.lucene.store.BufferedIndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NoLockFactory;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Counter;
import com.yammer.metrics.core.Histogram;
import com.yammer.metrics.core.Meter;
import com.yammer.metrics.core.MetricName;
public class HdfsDirectory extends Directory implements LastModified, HdfsSymlink {
private static final Log LOG = LogFactory.getLog(HdfsDirectory.class);
public static final String LNK = ".lnk";
public static final String TMP = ".tmp";
private static final String UTF_8 = "UTF-8";
private static final String HDFS_SCHEMA = "hdfs";
/**
* We keep the metrics separate per filesystem.
*/
protected static Map<URI, MetricsGroup> _metricsGroupMap = new WeakHashMap<URI, MetricsGroup>();
private static final Timer TIMER;
private static final BlockingQueue<Closeable> CLOSING_QUEUE = new LinkedBlockingQueue<Closeable>();
static class FStat {
FStat(FileStatus fileStatus) {
this(fileStatus.getModificationTime(), fileStatus.getLen());
}
FStat(long lastMod, long length) {
_lastMod = lastMod;
_length = length;
}
final long _lastMod;
final long _length;
}
static {
TIMER = new Timer("HdfsDirectory-Timer", true);
TIMER.schedule(getClosingQueueTimerTask(), TimeUnit.SECONDS.toMillis(3), TimeUnit.SECONDS.toMillis(3));
}
protected final Path _path;
protected final FileSystem _fileSystem;
protected final MetricsGroup _metricsGroup;
protected final FStatusCache _fileStatusCache;
protected final Map<String, Boolean> _symlinkMap = new ConcurrentHashMap<String, Boolean>();
protected final Map<String, Path> _symlinkPathMap = new ConcurrentHashMap<String, Path>();
protected final Map<String, Boolean> _copyFileMap = new ConcurrentHashMap<String, Boolean>();
protected final Map<String, Path> _copyFilePathMap = new ConcurrentHashMap<String, Path>();
protected final boolean _useCache = true;
protected final boolean _asyncClosing;
protected final SequentialReadControl _sequentialReadControl;
protected final boolean _resourceTracking;
static class FStatusCache {
final Map<String, FStat> _cache = new ConcurrentHashMap<String, FStat>();
final Path _path;
final FileSystem _fileSystem;
final Path _newManifest;
final Path _manifest;
final WriteLock _writeLock;
final ReadLock _readLock;
final Path _newManifestTmp;
public FStatusCache(FileSystem fileSystem, Path path) {
_fileSystem = fileSystem;
_path = path;
_newManifest = new Path(_path, "file_manifest.new");
_newManifestTmp = new Path(_path, "file_manifest.tmp");
_manifest = new Path(_path, "file_manifest");
ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true);
_writeLock = lock.writeLock();
_readLock = lock.readLock();
}
public void putAllFStat(Map<String, FStat> bulk) throws IOException {
_writeLock.lock();
try {
_cache.putAll(bulk);
syncFileCache();
} finally {
_writeLock.unlock();
}
}
public void putFStat(String name, FStat fStat) throws IOException {
_writeLock.lock();
try {
_cache.put(name, fStat);
syncFileCache();
} finally {
_writeLock.unlock();
}
}
public void removeFStat(String name) throws IOException {
_writeLock.lock();
try {
_cache.remove(name);
syncFileCache();
} finally {
_writeLock.unlock();
}
}
public Set<String> getNames() {
_readLock.lock();
try {
return new HashSet<String>(_cache.keySet());
} finally {
_readLock.unlock();
}
}
public boolean containsFile(String name) {
_readLock.lock();
try {
return _cache.containsKey(name);
} finally {
_readLock.unlock();
}
}
public FStat getFStat(String name) {
_readLock.lock();
try {
return _cache.get(name);
} finally {
_readLock.unlock();
}
}
public boolean loadCacheFromManifest() throws IOException {
// Check file_manifest.new first, if is doesn't check file_manifest, if it
// doesn't exist can't load cache.
if (_fileSystem.exists(_newManifest)) {
loadCacheFromManifest(_newManifest);
return true;
} else if (_fileSystem.exists(_manifest)) {
loadCacheFromManifest(_manifest);
return true;
} else {
return false;
}
}
private void syncFileCache() throws IOException {
FSDataOutputStream outputStream = _fileSystem.create(_newManifestTmp, true);
writeFileCache(outputStream);
outputStream.close();
_fileSystem.delete(_newManifest, false);
if (_fileSystem.rename(_newManifestTmp, _newManifest)) {
_fileSystem.delete(_manifest, false);
if (_fileSystem.rename(_newManifest, _manifest)) {
LOG.debug("Manifest sync complete for [{0}]", _manifest);
} else {
throw new IOException("Could not rename [" + _newManifest + "] to [" + _manifest + "]");
}
} else {
throw new IOException("Could not rename [" + _newManifestTmp + "] to [" + _newManifest + "]");
}
}
private void writeFileCache(FSDataOutputStream outputStream) throws IOException {
Set<Entry<String, FStat>> entrySet = _cache.entrySet();
outputStream.writeInt(_cache.size());
for (Entry<String, FStat> e : entrySet) {
String name = e.getKey();
FStat fstat = e.getValue();
writeString(outputStream, name);
outputStream.writeLong(fstat._lastMod);
outputStream.writeLong(fstat._length);
}
}
private void loadCacheFromManifest(Path manifest) throws IOException {
FSDataInputStream inputStream = _fileSystem.open(manifest);
int count = inputStream.readInt();
for (int i = 0; i < count; i++) {
String name = readString(inputStream);
long lastMod = inputStream.readLong();
long length = inputStream.readLong();
FStat fstat = new FStat(lastMod, length);
_cache.put(name, fstat);
}
inputStream.close();
}
private String readString(FSDataInputStream inputStream) throws IOException {
int length = inputStream.readInt();
byte[] buf = new byte[length];
inputStream.readFully(buf);
return new String(buf, UTF_8);
}
private void writeString(FSDataOutputStream outputStream, String s) throws IOException {
byte[] bs = s.getBytes(UTF_8);
outputStream.writeInt(bs.length);
outputStream.write(bs);
}
}
public HdfsDirectory(Configuration configuration, Path path) throws IOException {
this(configuration, path, new SequentialReadControl(new BlurConfiguration()));
}
public HdfsDirectory(Configuration configuration, Path path, SequentialReadControl sequentialReadControl)
throws IOException {
this(configuration, path, sequentialReadControl, false);
}
public HdfsDirectory(Configuration configuration, Path path, SequentialReadControl sequentialReadControl,
boolean resourceTracking) throws IOException {
_resourceTracking = resourceTracking;
if (sequentialReadControl == null) {
_sequentialReadControl = new SequentialReadControl(new BlurConfiguration());
} else {
_sequentialReadControl = sequentialReadControl;
}
_fileSystem = path.getFileSystem(configuration);
_path = _fileSystem.makeQualified(path);
if (_path.toUri().getScheme().equals(HDFS_SCHEMA)) {
_asyncClosing = true;
} else {
_asyncClosing = false;
}
_fileSystem.mkdirs(path);
setLockFactory(NoLockFactory.getNoLockFactory());
synchronized (_metricsGroupMap) {
URI uri = _fileSystem.getUri();
MetricsGroup metricsGroup = _metricsGroupMap.get(uri);
if (metricsGroup == null) {
String scope = uri.toString();
metricsGroup = createNewMetricsGroup(scope);
_metricsGroupMap.put(uri, metricsGroup);
}
_metricsGroup = metricsGroup;
}
if (_useCache) {
_fileStatusCache = new FStatusCache(_fileSystem, _path);
if (!_fileStatusCache.loadCacheFromManifest()) {
FileStatus[] listStatus = _fileSystem.listStatus(_path);
addToCache(listStatus);
}
} else {
_fileStatusCache = null;
}
}
private void addToCache(FileStatus[] listStatus) throws IOException {
Map<String, FStat> bulk = new HashMap<String, FStat>();
for (FileStatus fileStatus : listStatus) {
if (!fileStatus.isDir()) {
Path p = fileStatus.getPath();
String name = p.getName();
long lastMod;
long length;
String resolvedName;
if (name.endsWith(LNK)) {
resolvedName = getRealFileName(name);
Path resolvedPath = getPath(resolvedName);
FileStatus resolvedFileStatus = _fileSystem.getFileStatus(resolvedPath);
lastMod = resolvedFileStatus.getModificationTime();
} else {
resolvedName = name;
lastMod = fileStatus.getModificationTime();
}
length = length(resolvedName);
bulk.put(resolvedName, new FStat(lastMod, length));
}
}
LOG.info("Bulk cache update for [{0}] complete", _path);
_fileStatusCache.putAllFStat(bulk);
}
private static TimerTask getClosingQueueTimerTask() {
return new TimerTask() {
@Override
public void run() {
try {
while (true) {
Closeable closeable = CLOSING_QUEUE.poll();
if (closeable == null) {
return;
}
LOG.info("Closing [{0}] [{1}]", System.identityHashCode(closeable), closeable);
org.apache.hadoop.io.IOUtils.cleanup(LOG, closeable);
}
} catch (Throwable t) {
LOG.error("Unknown error.", t);
}
}
};
}
public static String getRealFileName(String name) {
if (name.endsWith(LNK)) {
int lastIndexOf = name.lastIndexOf(LNK);
return name.substring(0, lastIndexOf);
}
return name;
}
protected MetricsGroup createNewMetricsGroup(String scope) {
MetricName readRandomAccessName = new MetricName(ORG_APACHE_BLUR, HDFS, "Read Random Latency in \u00B5s", scope);
MetricName readStreamAccessName = new MetricName(ORG_APACHE_BLUR, HDFS, "Read Stream Latency in \u00B5s", scope);
MetricName writeAcccessName = new MetricName(ORG_APACHE_BLUR, HDFS, "Write Latency in \u00B5s", scope);
MetricName readRandomThroughputName = new MetricName(ORG_APACHE_BLUR, HDFS, "Read Random Throughput", scope);
MetricName readStreamThroughputName = new MetricName(ORG_APACHE_BLUR, HDFS, "Read Stream Throughput", scope);
MetricName readSeekName = new MetricName(ORG_APACHE_BLUR, HDFS, "Read Stream Seeks", scope);
MetricName writeThroughputName = new MetricName(ORG_APACHE_BLUR, HDFS, "Write Throughput", scope);
MetricName totalHdfsBlocks = new MetricName(ORG_APACHE_BLUR, HDFS, "Hdfs Blocks Total", scope);
MetricName localHdfsBlocks = new MetricName(ORG_APACHE_BLUR, HDFS, "Hdfs Blocks Local", scope);
Histogram readRandomAccess = Metrics.newHistogram(readRandomAccessName);
Histogram readStreamAccess = Metrics.newHistogram(readStreamAccessName);
Histogram writeAccess = Metrics.newHistogram(writeAcccessName);
Meter readRandomThroughput = Metrics.newMeter(readRandomThroughputName, "Read Random Bytes", TimeUnit.SECONDS);
Meter readStreamThroughput = Metrics.newMeter(readStreamThroughputName, "Read Stream Bytes", TimeUnit.SECONDS);
Meter readStreamSeek = Metrics.newMeter(readSeekName, "Read Stream Seeks", TimeUnit.SECONDS);
Meter writeThroughput = Metrics.newMeter(writeThroughputName, "Write Bytes", TimeUnit.SECONDS);
Counter totalHdfsBlock = Metrics.newCounter(totalHdfsBlocks);
Counter localHdfsBlock = Metrics.newCounter(localHdfsBlocks);
return new MetricsGroup(readRandomAccess, readStreamAccess, writeAccess, readRandomThroughput,
readStreamThroughput, readStreamSeek, writeThroughput, totalHdfsBlock, localHdfsBlock);
}
@Override
public String toString() {
return "HdfsDirectory path=[" + getPath() + "]";
}
@Override
public IndexOutput createOutput(final String name, IOContext context) throws IOException {
LOG.debug("createOutput [{0}] [{1}] [{2}]", name, context, getPath());
if (fileExists(name)) {
deleteFile(name);
}
if (_useCache) {
_fileStatusCache.putFStat(name, new FStat(System.currentTimeMillis(), 0L));
}
final FSDataOutputStream outputStream = openForOutput(name);
trackObject(outputStream, "Outputstream", name, _path);
return new BufferedIndexOutput() {
@Override
public long length() throws IOException {
return outputStream.getPos();
}
@Override
protected void flushBuffer(byte[] b, int offset, int len) throws IOException {
long start = System.nanoTime();
outputStream.write(b, offset, len);
long end = System.nanoTime();
_metricsGroup.writeAccess.update((end - start) / 1000);
_metricsGroup.writeThroughput.mark(len);
}
@Override
public void close() throws IOException {
super.close();
long length = outputStream.getPos();
if (_useCache) {
_fileStatusCache.putFStat(name, new FStat(System.currentTimeMillis(), length));
}
// This exists because HDFS is so slow to close files. There are
// built-in sleeps during the close call.
if (_asyncClosing && _useCache) {
outputStream.sync();
CLOSING_QUEUE.add(outputStream);
} else {
outputStream.close();
}
}
@Override
public void seek(long pos) throws IOException {
throw new IOException("seeks not allowed on IndexOutputs.");
}
};
}
protected <T> void trackObject(T t, String message, Object... args) {
if (_resourceTracking) {
MemoryLeakDetector.record(t, message, args);
}
}
protected FSDataOutputStream openForOutput(String name) throws IOException {
Path path = getPath(name);
Tracer trace = Trace.trace("filesystem - create", Trace.param("path", path));
try {
return _fileSystem.create(path);
} finally {
trace.done();
}
}
@Override
public IndexInput openInput(String name, IOContext context) throws IOException {
LOG.debug("openInput [{0}] [{1}] [{2}]", name, context, getPath());
if (!fileExists(name)) {
throw new FileNotFoundException("File [" + name + "] not found.");
}
long fileLength = fileLength(name);
Path path = getPath(name);
FSInputFileHandle fsInputFileHandle = new FSInputFileHandle(_fileSystem, path, fileLength, name, _resourceTracking,
_asyncClosing && _useCache);
HdfsIndexInput input = new HdfsIndexInput(this, fsInputFileHandle, fileLength, _metricsGroup, name,
_sequentialReadControl.clone());
return input;
}
@Override
public String[] listAll() throws IOException {
LOG.debug("listAll [{0}]", getPath());
if (_useCache) {
Set<String> names = _fileStatusCache.getNames();
return names.toArray(new String[names.size()]);
}
Tracer trace = Trace.trace("filesystem - list", Trace.param("path", getPath()));
try {
FileStatus[] files = _fileSystem.listStatus(getPath(), new PathFilter() {
@Override
public boolean accept(Path path) {
try {
return _fileSystem.isFile(path);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
SortedSet<String> result = new TreeSet<String>();
for (int i = 0; i < files.length; i++) {
String name = files[i].getPath().getName();
if (name.endsWith(LNK)) {
result.add(getRealFileName(name));
} else {
result.add(name);
}
}
return result.toArray(new String[result.size()]);
} finally {
trace.done();
}
}
@Override
public boolean fileExists(String name) throws IOException {
LOG.debug("fileExists [{0}] [{1}]", name, getPath());
if (_useCache) {
return _fileStatusCache.containsFile(name);
}
return exists(name);
}
protected boolean exists(String name) throws IOException {
Path path = getPath(name);
Tracer trace = Trace.trace("filesystem - exists", Trace.param("path", path));
try {
return _fileSystem.exists(path);
} finally {
trace.done();
}
}
@Override
public void deleteFile(String name) throws IOException {
LOG.debug("deleteFile [{0}] [{1}]", name, getPath());
if (fileExists(name)) {
if (_useCache) {
_fileStatusCache.removeFStat(name);
}
delete(name);
} else {
throw new FileNotFoundException("File [" + name + "] not found");
}
}
protected void delete(String name) throws IOException {
Tracer trace = Trace.trace("filesystem - delete", Trace.param("path", getPath(name)));
if (_useCache) {
_symlinkMap.remove(name);
_symlinkPathMap.remove(name);
}
try {
Path symlinkPath = getPathOrSymlinkForDelete(name);
_fileSystem.delete(symlinkPath, true);
} finally {
trace.done();
}
}
@Override
public long fileLength(String name) throws IOException {
LOG.debug("fileLength [{0}] [{1}]", name, getPath());
if (_useCache) {
FStat fStat = _fileStatusCache.getFStat(name);
if (fStat == null) {
throw new FileNotFoundException(name);
}
return fStat._length;
}
return length(name);
}
protected long length(String name) throws IOException {
Path path = getPath(name);
Tracer trace = Trace.trace("filesystem - length", Trace.param("path", path));
try {
if (_fileSystem instanceof DistributedFileSystem) {
FSDataInputStream in = _fileSystem.open(path);
try {
return HdfsUtils.getFileLength(_fileSystem, path, in);
} finally {
in.close();
}
} else {
return _fileSystem.getFileStatus(path).getLen();
}
} finally {
trace.done();
}
}
@Override
public void sync(Collection<String> names) throws IOException {
}
@Override
public void close() throws IOException {
TIMER.purge();
}
public Path getPath() {
return _path;
}
protected Path getPath(String name) throws IOException {
if (isSymlink(name)) {
return getRealFilePathFromSymlink(name);
} else {
return new Path(_path, name);
}
}
protected Path getRealFilePathFromCopyFileList(FileStatus[] listStatus) throws IOException {
if (listStatus == null || listStatus.length == 0) {
throw new IOException("Copy file list empty.");
}
Arrays.sort(listStatus);
return listStatus[listStatus.length - 1].getPath();
}
protected Path getPathOrSymlinkForDelete(String name) throws IOException {
if (isSymlink(name)) {
return new Path(_path, name + LNK);
}
return new Path(_path, name);
}
public Path getRealFilePathFromSymlink(String name) throws IOException {
// need to cache
if (_useCache) {
Path path = _symlinkPathMap.get(name);
if (path != null) {
return path;
}
}
Tracer trace = Trace.trace("filesystem - getRealFilePathFromSymlink", Trace.param("name", name));
try {
Path linkPath = new Path(_path, name + LNK);
Path path = readRealPathDataFromSymlinkPath(_fileSystem, linkPath);
if (_useCache) {
_symlinkPathMap.put(name, path);
}
return path;
} finally {
trace.done();
}
}
public static Path readRealPathDataFromSymlinkPath(FileSystem fileSystem, Path linkPath) throws IOException,
UnsupportedEncodingException {
FileStatus fileStatus = fileSystem.getFileStatus(linkPath);
FSDataInputStream inputStream = fileSystem.open(linkPath);
byte[] buf = new byte[(int) fileStatus.getLen()];
inputStream.readFully(buf);
inputStream.close();
Path path = new Path(new String(buf, UTF_8));
return path;
}
protected boolean isSymlink(String name) throws IOException {
if (_useCache) {
Boolean b = _symlinkMap.get(name);
if (b != null) {
return b;
}
}
Tracer trace = Trace.trace("filesystem - isSymlink", Trace.param("name", name));
try {
boolean exists = _fileSystem.exists(new Path(_path, name + LNK));
if (_useCache) {
_symlinkMap.put(name, exists);
}
return exists;
} finally {
trace.done();
}
}
public long getFileModified(String name) throws IOException {
if (_useCache) {
FStat fStat = _fileStatusCache.getFStat(name);
if (fStat == null) {
throw new FileNotFoundException("File [" + name + "] not found");
}
return fStat._lastMod;
}
return fileModified(name);
}
protected long fileModified(String name) throws IOException {
Path path = getPath(name);
Tracer trace = Trace.trace("filesystem - fileModified", Trace.param("path", path));
try {
FileStatus fileStatus = _fileSystem.getFileStatus(path);
if (_useCache) {
_fileStatusCache.putFStat(name, new FStat(fileStatus));
}
return fileStatus.getModificationTime();
} finally {
trace.done();
}
}
@Override
public void copy(Directory to, String src, String dest, IOContext context) throws IOException {
if (to instanceof DirectoryDecorator) {
// Unwrap original directory
copy(((DirectoryDecorator) to).getOriginalDirectory(), src, dest, context);
return;
} else if (to instanceof HdfsSymlink) {
// Attempt to create a symlink and return.
if (createSymLink(((HdfsSymlink) to).getSymlinkDirectory(), src, dest)) {
return;
}
}
// if all else fails, just copy the file.
super.copy(to, src, dest, context);
}
protected boolean createSymLink(HdfsDirectory to, String src, String dest) throws IOException {
Path srcPath = getPath(src);
Path destDir = to.getPath();
LOG.info("Creating symlink with name [{0}] to [{1}]", dest, srcPath);
FSDataOutputStream outputStream = _fileSystem.create(getSymPath(destDir, dest));
outputStream.write(srcPath.toString().getBytes(UTF_8));
outputStream.close();
if (_useCache) {
to._fileStatusCache.putFStat(dest, _fileStatusCache.getFStat(src));
}
return true;
}
protected Path getSymPath(Path destDir, String destFilename) {
return new Path(destDir, destFilename + LNK);
}
@Override
public HdfsDirectory getSymlinkDirectory() {
return this;
}
}