/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.alibaba.jstorm.hdfs.blobstore; import backtype.storm.Config; import com.alibaba.jstorm.blobstore.BlobStoreFile; import com.alibaba.jstorm.utils.JStormUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; import java.util.Timer; import java.util.TimerTask; /** * HDFS blob store impl. */ public class HdfsBlobStoreImpl { private static final Logger LOG = LoggerFactory.getLogger(HdfsBlobStoreImpl.class); private static final long FULL_CLEANUP_FREQ = 60 * 60 * 1000l; private static final int BUCKETS = 1024; private static final Timer timer = new Timer("HdfsBlobStore cleanup thread", true); private static final String BLOBSTORE_DATA = "data"; public class KeyInHashDirIterator implements Iterator<String> { private int currentBucket = 0; private Iterator<String> it = null; private String next = null; public KeyInHashDirIterator() throws IOException { primeNext(); } private void primeNext() throws IOException { while (it == null && currentBucket < BUCKETS) { String name = String.valueOf(currentBucket); Path dir = new Path(_fullPath, name); try { it = listKeys(dir); } catch (FileNotFoundException e) { it = null; } if (it == null || !it.hasNext()) { it = null; currentBucket++; } else { next = it.next(); } } } @Override public boolean hasNext() { return next != null; } @Override public String next() { if (!hasNext()) { throw new NoSuchElementException(); } String current = next; next = null; if (it != null) { if (!it.hasNext()) { it = null; currentBucket++; try { primeNext(); } catch (IOException e) { throw new RuntimeException(e); } } else { next = it.next(); } } return current; } @Override public void remove() { throw new UnsupportedOperationException("Delete Not Supported"); } } private Path _fullPath; private FileSystem _fs; private TimerTask _cleanup = null; private Configuration _hadoopConf; // blobstore directory is private! final public static FsPermission BLOBSTORE_DIR_PERMISSION = FsPermission.createImmutable((short) 0755); // rwxr-xr-x public HdfsBlobStoreImpl(Path path, Map<String, Object> conf) throws IOException { this(path, conf, new Configuration()); } public HdfsBlobStoreImpl(Path path, Map<String, Object> conf, Configuration hconf) throws IOException { LOG.info("Blob store based in {}", path); _fullPath = path; _hadoopConf = hconf; String hdfsHostName = (String) conf.get(Config.BLOBSTORE_HDFS_HOSTNAME); Integer hdfsPort = JStormUtils.parseInt(conf.get(Config.BLOBSTORE_HDFS_PORT)); String defaultFS = (String) conf.get(Config.BLOBSTORE_HDFS_DEFAULT_FS); if ((hdfsHostName == null || hdfsPort == null) && defaultFS == null) { throw new RuntimeException("<blobstore.hdfs.hostname, blobstore.hdfs.port> and blobstore.hdfs.defaultFS " + "is empty. You must specify an HDFS location! "); } if (defaultFS == null) { defaultFS = String.format("hdfs://%s:%d", hdfsHostName, hdfsPort); } LOG.info("HDFS blob store, using defaultFS: {}", defaultFS); _hadoopConf.set("fs.defaultFS", defaultFS); String keyPrefix = "blobstore.hdfs."; for (Map.Entry<String, Object> confEntry : conf.entrySet()) { String key = confEntry.getKey(); Object value = confEntry.getValue(); if (key.startsWith(keyPrefix) && value != null) { key = key.substring(keyPrefix.length(), key.length()); LOG.info("adding \"{}={}\" to hadoop conf", key, value); _hadoopConf.set(key, value.toString()); } } _fs = path.getFileSystem(_hadoopConf); if (!_fs.exists(_fullPath)) { FsPermission perms = new FsPermission(BLOBSTORE_DIR_PERMISSION); boolean success = false; try { success = _fs.mkdirs(_fullPath, perms); } catch (IOException e) { LOG.error("fs mkdir ", e); } if (!success) { throw new IOException("Error creating blobstore directory: " + _fullPath); } } Object shouldCleanup = conf.get(Config.BLOBSTORE_CLEANUP_ENABLE); if (JStormUtils.parseBoolean(shouldCleanup, false)) { LOG.debug("Starting hdfs blobstore cleaner"); _cleanup = new TimerTask() { @Override public void run() { try { fullCleanup(FULL_CLEANUP_FREQ); } catch (IOException e) { LOG.error("Error trying to cleanup", e); } } }; timer.scheduleAtFixedRate(_cleanup, 0, FULL_CLEANUP_FREQ); } } /** * @return all keys that are available for reading. * @throws IOException on any error. */ public Iterator<String> listKeys() throws IOException { return new KeyInHashDirIterator(); } /** * Get an input stream for reading a part. * * @param key the key of the part to read. * @return the where to read the data from. * @throws IOException on any error */ public BlobStoreFile read(String key) throws IOException { return new HdfsBlobStoreFile(getKeyDir(key), BLOBSTORE_DATA, _hadoopConf); } /** * Get an object tied to writing the data. * * @param key the key of the part to write to. * @param create whether the file needs to be new or not. * @return an object that can be used to both write to, but also commit/cancel the operation. * @throws IOException on any error */ public BlobStoreFile write(String key, boolean create) throws IOException { return new HdfsBlobStoreFile(getKeyDir(key), true, create, _hadoopConf); } /** * Check if the key exists in the blob store. * * @param key the key to check for * @return true if it exists else false. */ public boolean exists(String key) { Path dir = getKeyDir(key); boolean res = false; try { _fs = dir.getFileSystem(_hadoopConf); res = _fs.exists(dir); } catch (IOException e) { LOG.warn("Exception checking for exists on: " + key); } return res; } /** * Delete a key from the blob store * * @param key the key to delete * @throws IOException on any error */ public void deleteKey(String key) throws IOException { Path keyDir = getKeyDir(key); HdfsBlobStoreFile pf = new HdfsBlobStoreFile(keyDir, BLOBSTORE_DATA, _hadoopConf); pf.delete(); delete(keyDir); } protected Path getKeyDir(String key) { String hash = String.valueOf(Math.abs((long) key.hashCode()) % BUCKETS); Path hashDir = new Path(_fullPath, hash); Path ret = new Path(hashDir, key); LOG.debug("{} Looking for {} in {}", new Object[]{_fullPath, key, hash}); return ret; } public void fullCleanup(long age) throws IOException { long cleanUpIfBefore = System.currentTimeMillis() - age; Iterator<String> keys = new KeyInHashDirIterator(); while (keys.hasNext()) { String key = keys.next(); Path keyDir = getKeyDir(key); Iterator<BlobStoreFile> i = listBlobStoreFiles(keyDir); if (!i.hasNext()) { //The dir is empty, so try to delete it, may fail, but that is OK try { _fs.delete(keyDir, true); } catch (Exception e) { LOG.warn("Could not delete " + keyDir + " will try again later"); } } while (i.hasNext()) { BlobStoreFile f = i.next(); if (f.isTmp()) { if (f.getModTime() <= cleanUpIfBefore) { f.delete(); } } } } } protected Iterator<BlobStoreFile> listBlobStoreFiles(Path path) throws IOException { ArrayList<BlobStoreFile> ret = new ArrayList<BlobStoreFile>(); FileStatus[] files = _fs.listStatus(new Path[]{path}); if (files != null) { for (FileStatus sub : files) { try { ret.add(new HdfsBlobStoreFile(sub.getPath().getParent(), sub.getPath().getName(), _hadoopConf)); } catch (IllegalArgumentException e) { //Ignored the file did not match LOG.warn("Found an unexpected file in {} {}", path, sub.getPath().getName()); } } } return ret.iterator(); } protected Iterator<String> listKeys(Path path) throws IOException { ArrayList<String> ret = new ArrayList<String>(); FileStatus[] files = _fs.listStatus(new Path[]{path}); if (files != null) { for (FileStatus sub : files) { try { ret.add(sub.getPath().getName().toString()); } catch (IllegalArgumentException e) { //Ignored the file did not match LOG.debug("Found an unexpected file in {} {}", path, sub.getPath().getName()); } } } return ret.iterator(); } protected int getBlobReplication(String key) throws IOException { Path path = getKeyDir(key); Path dest = new Path(path, BLOBSTORE_DATA); return _fs.getFileStatus(dest).getReplication(); } protected int updateBlobReplication(String key, int replication) throws IOException { Path path = getKeyDir(key); Path dest = new Path(path, BLOBSTORE_DATA); _fs.setReplication(dest, (short) replication); return _fs.getFileStatus(dest).getReplication(); } protected void delete(Path path) throws IOException { _fs.delete(path, true); } public void shutdown() { if (_cleanup != null) { _cleanup.cancel(); _cleanup = null; } } }