/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.blur.store.hdfs_v2; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.Timer; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import org.apache.blur.kvs.BytesRef; import org.apache.blur.kvs.HdfsKeyValueStore; import org.apache.blur.log.Log; import org.apache.blur.log.LogFactory; import org.apache.blur.memory.MemoryLeakDetector; import org.apache.blur.store.blockcache.LastModified; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.NoLockFactory; public class FastHdfsKeyValueDirectory extends Directory implements LastModified { private static final String MISSING_METADATA_MESSAGE = "Missing meta data for file [{0}], setting length to '0'. This can occur when a kv log files writes across blocks in hdfs."; private static final long GC_DELAY = TimeUnit.HOURS.toMillis(1); private static final Log LOG = LogFactory.getLog(FastHdfsKeyValueDirectory.class); private static final String LASTMOD = "/lastmod"; private static final String LENGTH = "/length"; private static final BytesRef FILES = new BytesRef("FILES"); private static final String SEP = "|"; private final Map<String, Long> _files = new ConcurrentHashMap<String, Long>(); private final HdfsKeyValueStore _store; private final int _blockSize = 4096; private final Path _path; private final boolean _readOnly; private long _lastGc; public FastHdfsKeyValueDirectory(boolean readOnly, Timer hdfsKeyValueTimer, Configuration configuration, Path path) throws IOException { this(readOnly, hdfsKeyValueTimer, configuration, path, HdfsKeyValueStore.DEFAULT_MAX_AMOUNT_ALLOWED_PER_FILE, HdfsKeyValueStore.DEFAULT_MAX_OPEN_FOR_WRITING); } public FastHdfsKeyValueDirectory(boolean readOnly, Timer hdfsKeyValueTimer, Configuration configuration, Path path, long maxAmountAllowedPerFile, long maxOpenForWriting) throws IOException { _path = path; _readOnly = readOnly; _store = new HdfsKeyValueStore(readOnly, hdfsKeyValueTimer, configuration, path, maxAmountAllowedPerFile, maxOpenForWriting); MemoryLeakDetector.record(_store, "HdfsKeyValueStore", path.toString()); BytesRef value = new BytesRef(); if (_store.get(FILES, value)) { String filesString = value.utf8ToString(); // System.out.println("Open Files String [" + filesString + "]"); if (!filesString.isEmpty()) { String[] files = filesString.split("\\" + SEP); for (String file : files) { if (file.isEmpty()) { throw new IOException("Empty file names should not occur [" + filesString + "]"); } BytesRef key = new BytesRef(file + LENGTH); if (_store.get(key, value)) { _files.put(file, Long.parseLong(value.utf8ToString())); } else { LOG.warn(MISSING_METADATA_MESSAGE, file); } } } } setLockFactory(NoLockFactory.getNoLockFactory()); if (!_readOnly) { writeFileNamesAndSync(); gc(); } } public void gc() throws IOException { LOG.info("Running GC over the hdfs kv directory [{0}].", _path); Iterable<Entry<BytesRef, BytesRef>> scan = _store.scan(null); List<BytesRef> toBeDeleted = new ArrayList<BytesRef>(); for (Entry<BytesRef, BytesRef> e : scan) { BytesRef bytesRef = e.getKey(); if (bytesRef.equals(FILES)) { continue; } String key = bytesRef.utf8ToString(); int indexOf = key.indexOf('/'); if (indexOf < 0) { LOG.error("Unknown key type in hdfs kv store [" + key + "]"); } else { String filename = key.substring(0, indexOf); if (!_files.containsKey(filename)) { toBeDeleted.add(bytesRef); } } } for (BytesRef key : toBeDeleted) { _store.delete(key); } _lastGc = System.currentTimeMillis(); } public void writeBlock(String name, long blockId, byte[] b, int offset, int length) throws IOException { _store.put(new BytesRef(name + "/" + blockId), new BytesRef(b, offset, length)); } public void readBlock(String name, long blockId, BytesRef ref) throws IOException { if (!_store.get(new BytesRef(name + "/" + blockId), ref)) { throw new IOException("Block [" + name + "] [" + blockId + "] not found."); } } public synchronized void writeLength(String name, long length) throws IOException { _files.put(name, length); _store.put(new BytesRef(name + LENGTH), new BytesRef(Long.toString(length))); _store.put(new BytesRef(name + LASTMOD), new BytesRef(Long.toString(System.currentTimeMillis()))); writeFilesNames(); } private void writeFilesNames() throws IOException { StringBuilder builder = new StringBuilder(); Set<String> fileNames = new TreeSet<String>(_files.keySet()); for (String n : fileNames) { if (builder.length() != 0) { builder.append(SEP); } builder.append(n); } _store.put(FILES, new BytesRef(builder.toString())); } @Override public IndexInput openInput(String name, IOContext context) throws IOException { return new FastHdfsKeyValueIndexInput(name, fileLength(name), _blockSize, this); } @Override public IndexOutput createOutput(final String name, IOContext context) throws IOException { if (_readOnly) { throw new IOException("Directory is in read only mode."); } if (fileExists(name)) { deleteFile(name); } return new FastHdfsKeyValueIndexOutput(name, _blockSize, this); } @Override public String[] listAll() throws IOException { Set<String> fileNames = new HashSet<String>(_files.keySet()); return fileNames.toArray(new String[fileNames.size()]); } @Override public boolean fileExists(String name) throws IOException { boolean containsKey = _files.containsKey(name); LOG.debug("FileExists [{0}] [{1}].", name, containsKey); return containsKey; } @Override public void deleteFile(String name) throws IOException { if (_readOnly) { throw new IOException("Directory is in read only mode."); } Long length = _files.remove(name); if (length != null) { LOG.debug("Removing file [{0}] with length [{1}].", name, length); long blocks = length / _blockSize; _store.delete(new BytesRef(name + LENGTH)); _store.delete(new BytesRef(name + LASTMOD)); for (long l = 0; l <= blocks; l++) { _store.delete(new BytesRef(name + "/" + l)); } writeFileNamesAndSync(); } } @Override public long fileLength(String name) throws IOException { if (fileExists(name)) { return _files.get(name); } throw new FileNotFoundException(name); } @Override public void sync(Collection<String> names) throws IOException { writeFileNamesAndSync(); if (shouldPerformGC()) { gc(); } } private void writeFileNamesAndSync() throws IOException { writeFilesNames(); _store.sync(); } private boolean shouldPerformGC() { if (_lastGc + GC_DELAY < System.currentTimeMillis()) { return true; } return false; } @Override public void close() throws IOException { _store.close(); } @Override public long getFileModified(String name) throws IOException { BytesRef value = new BytesRef(); if (_store.get(new BytesRef(name + LASTMOD), value)) { return Long.parseLong(value.utf8ToString()); } throw new FileNotFoundException(name); } }