/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.master.keyvalue; import alluxio.AlluxioURI; import alluxio.Constants; import alluxio.Server; import alluxio.clock.SystemClock; import alluxio.exception.AccessControlException; import alluxio.exception.AlluxioException; import alluxio.exception.ExceptionMessage; import alluxio.exception.FileAlreadyExistsException; import alluxio.exception.FileDoesNotExistException; import alluxio.exception.InvalidPathException; import alluxio.master.AbstractMaster; import alluxio.master.file.FileSystemMaster; import alluxio.master.file.options.CreateDirectoryOptions; import alluxio.master.file.options.DeleteOptions; import alluxio.master.file.options.RenameOptions; import alluxio.master.journal.Journal; import alluxio.proto.journal.Journal.JournalEntry; import alluxio.proto.journal.KeyValue.CompletePartitionEntry; import alluxio.proto.journal.KeyValue.CompleteStoreEntry; import alluxio.proto.journal.KeyValue.CreateStoreEntry; import alluxio.proto.journal.KeyValue.DeleteStoreEntry; import alluxio.proto.journal.KeyValue.MergeStoreEntry; import alluxio.proto.journal.KeyValue.RenameStoreEntry; import alluxio.thrift.KeyValueMasterClientService; import alluxio.thrift.PartitionInfo; import alluxio.util.IdUtils; import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.io.PathUtils; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterators; import org.apache.thrift.TProcessor; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import java.util.UUID; import javax.annotation.concurrent.ThreadSafe; /** * The key-value master stores key-value store information in Alluxio, including the partitions of * each key-value store. */ @ThreadSafe public final class KeyValueMaster extends AbstractMaster { private static final Set<Class<? extends Server>> DEPS = ImmutableSet.<Class<? extends Server>>of(FileSystemMaster.class); private final FileSystemMaster mFileSystemMaster; /** Map from file id of a complete store to the list of partitions in this store. */ private final Map<Long, List<PartitionInfo>> mCompleteStoreToPartitions; /** * Map from file id of an incomplete store (i.e., some one is still writing new partitions) to the * list of partitions in this store. */ private final Map<Long, List<PartitionInfo>> mIncompleteStoreToPartitions; /** * @param fileSystemMaster the file system master handle * @param journal a {@link Journal} to write journal entries to */ KeyValueMaster(FileSystemMaster fileSystemMaster, Journal journal) { super(journal, new SystemClock(), ExecutorServiceFactories .fixedThreadPoolExecutorServiceFactory(Constants.KEY_VALUE_MASTER_NAME, 2)); mFileSystemMaster = fileSystemMaster; mCompleteStoreToPartitions = new HashMap<>(); mIncompleteStoreToPartitions = new HashMap<>(); } @Override public Map<String, TProcessor> getServices() { Map<String, TProcessor> services = new HashMap<>(); services.put(Constants.KEY_VALUE_MASTER_CLIENT_SERVICE_NAME, new KeyValueMasterClientService.Processor<>(new KeyValueMasterClientServiceHandler(this))); return services; } @Override public String getName() { return Constants.KEY_VALUE_MASTER_NAME; } @Override public Set<Class<? extends Server>> getDependencies() { return DEPS; } @Override public synchronized void processJournalEntry(JournalEntry entry) throws IOException { try { if (entry.hasCreateStore()) { createStoreFromEntry(entry.getCreateStore()); } else if (entry.hasCompletePartition()) { completePartitionFromEntry(entry.getCompletePartition()); } else if (entry.hasCompleteStore()) { completeStoreFromEntry(entry.getCompleteStore()); } else if (entry.hasDeleteStore()) { deleteStoreFromEntry(entry.getDeleteStore()); } else if (entry.hasRenameStore()) { renameStoreFromEntry(entry.getRenameStore()); } else if (entry.hasMergeStore()) { mergeStoreFromEntry(entry.getMergeStore()); } else { throw new IOException(ExceptionMessage.UNEXPECTED_JOURNAL_ENTRY.getMessage(entry)); } } catch (AlluxioException e) { throw new RuntimeException(e); } } @Override public synchronized Iterator<JournalEntry> getJournalEntryIterator() { return Iterators.concat(getStoreIterator(mCompleteStoreToPartitions), getStoreIterator(mIncompleteStoreToPartitions)); } @Override public void start(Boolean isLeader) throws IOException { super.start(isLeader); } /** * Marks a partition complete and adds it to an incomplete key-value store. * * @param path URI of the key-value store * @param info information of this completed partition * @throws AccessControlException if permission checking fails * @throws FileDoesNotExistException if the key-value store URI does not exists * @throws InvalidPathException if the path is invalid */ public synchronized void completePartition(AlluxioURI path, PartitionInfo info) throws AccessControlException, FileDoesNotExistException, InvalidPathException { final long fileId = mFileSystemMaster.getFileId(path); if (fileId == IdUtils.INVALID_FILE_ID) { throw new FileDoesNotExistException( String.format("Failed to completePartition: path %s does not exist", path)); } completePartitionInternal(fileId, info); writeJournalEntry(newCompletePartitionEntry(fileId, info)); flushJournal(); } // Marks a partition complete, called when replaying journals private void completePartitionFromEntry(CompletePartitionEntry entry) throws FileDoesNotExistException { PartitionInfo info = new PartitionInfo(entry.getKeyStartBytes().asReadOnlyByteBuffer(), entry.getKeyLimitBytes().asReadOnlyByteBuffer(), entry.getBlockId(), entry.getKeyCount()); completePartitionInternal(entry.getStoreId(), info); } // Internal implementation to mark a partition complete private void completePartitionInternal(long fileId, PartitionInfo info) throws FileDoesNotExistException { if (!mIncompleteStoreToPartitions.containsKey(fileId)) { // TODO(binfan): throw a better exception throw new FileDoesNotExistException(String.format( "Failed to completeStore: KeyValueStore (fileId=%d) was not created before", fileId)); } // NOTE: deep copy the partition info object mIncompleteStoreToPartitions.get(fileId).add(new PartitionInfo(info)); } /** * Marks a key-value store complete. * * @param path URI of the key-value store * @throws FileDoesNotExistException if the key-value store URI does not exists * @throws InvalidPathException if the path is not valid * @throws AccessControlException if permission checking fails */ public synchronized void completeStore(AlluxioURI path) throws FileDoesNotExistException, InvalidPathException, AccessControlException { final long fileId = mFileSystemMaster.getFileId(path); if (fileId == IdUtils.INVALID_FILE_ID) { throw new FileDoesNotExistException( String.format("Failed to completeStore: path %s does not exist", path)); } completeStoreInternal(fileId); writeJournalEntry(newCompleteStoreEntry(fileId)); flushJournal(); } // Marks a store complete, called when replaying journals private void completeStoreFromEntry(CompleteStoreEntry entry) throws FileDoesNotExistException { completeStoreInternal(entry.getStoreId()); } // Internal implementation to mark a store complete private void completeStoreInternal(long fileId) throws FileDoesNotExistException { if (!mIncompleteStoreToPartitions.containsKey(fileId)) { // TODO(binfan): throw a better exception throw new FileDoesNotExistException(String.format( "Failed to completeStore: KeyValueStore (fileId=%d) was not created before", fileId)); } List<PartitionInfo> partitions = mIncompleteStoreToPartitions.remove(fileId); mCompleteStoreToPartitions.put(fileId, partitions); } /** * Creates a new key-value store. * * @param path URI of the key-value store * @throws FileAlreadyExistsException if a key-value store URI exists * @throws InvalidPathException if the given path is invalid * @throws AccessControlException if permission checking fails */ public synchronized void createStore(AlluxioURI path) throws FileAlreadyExistsException, InvalidPathException, AccessControlException { try { // Create this dir mFileSystemMaster .createDirectory(path, CreateDirectoryOptions.defaults().setRecursive(true)); } catch (IOException e) { // TODO(binfan): Investigate why {@link FileSystemMaster#createDirectory} throws IOException throw new InvalidPathException( String.format("Failed to createStore: can not create path %s", path), e); } catch (FileDoesNotExistException e) { // This should be impossible since we pass the recursive option into mkdir throw Throwables.propagate(e); } long fileId = mFileSystemMaster.getFileId(path); Preconditions.checkState(fileId != IdUtils.INVALID_FILE_ID); createStoreInternal(fileId); writeJournalEntry(newCreateStoreEntry(fileId)); flushJournal(); } // Creates a store, called when replaying journals private void createStoreFromEntry(CreateStoreEntry entry) throws FileAlreadyExistsException { createStoreInternal(entry.getStoreId()); } // Internal implementation to create a store private void createStoreInternal(long fileId) throws FileAlreadyExistsException { if (mIncompleteStoreToPartitions.containsKey(fileId)) { // TODO(binfan): throw a better exception throw new FileAlreadyExistsException(String .format("Failed to createStore: KeyValueStore (fileId=%d) is already created", fileId)); } mIncompleteStoreToPartitions.put(fileId, new ArrayList<PartitionInfo>()); } /** * Deletes a completed key-value store. * * @param uri {@link AlluxioURI} to the store * @throws InvalidPathException if the uri exists but is not a key-value store * @throws FileDoesNotExistException if the uri does not exist */ public synchronized void deleteStore(AlluxioURI uri) throws IOException, InvalidPathException, FileDoesNotExistException, AlluxioException { long fileId = getFileId(uri); checkIsCompletePartition(fileId, uri); mFileSystemMaster.delete(uri, DeleteOptions.defaults().setRecursive(true)); deleteStoreInternal(fileId); writeJournalEntry(newDeleteStoreEntry(fileId)); flushJournal(); } // Deletes a store, called when replaying journals. private void deleteStoreFromEntry(DeleteStoreEntry entry) { deleteStoreInternal(entry.getStoreId()); } // Internal implementation to deleteStore a key-value store. private void deleteStoreInternal(long fileId) { mCompleteStoreToPartitions.remove(fileId); } private long getFileId(AlluxioURI uri) throws AccessControlException, FileDoesNotExistException, InvalidPathException { long fileId = mFileSystemMaster.getFileId(uri); if (fileId == IdUtils.INVALID_FILE_ID) { throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(uri)); } return fileId; } void checkIsCompletePartition(long fileId, AlluxioURI uri) throws InvalidPathException { if (!mCompleteStoreToPartitions.containsKey(fileId)) { throw new InvalidPathException(ExceptionMessage.INVALID_KEY_VALUE_STORE_URI.getMessage(uri)); } } /** * Renames one completed key-value store. * * @param oldUri the old {@link AlluxioURI} to the store * @param newUri the {@link AlluxioURI} to the store */ public synchronized void renameStore(AlluxioURI oldUri, AlluxioURI newUri) throws IOException, AlluxioException { long oldFileId = getFileId(oldUri); checkIsCompletePartition(oldFileId, oldUri); try { mFileSystemMaster.rename(oldUri, newUri, RenameOptions.defaults()); } catch (FileAlreadyExistsException e) { throw new FileAlreadyExistsException( String.format("failed to rename store:the path %s has been used", newUri), e); } final long newFileId = mFileSystemMaster.getFileId(newUri); Preconditions.checkState(newFileId != IdUtils.INVALID_FILE_ID); renameStoreInternal(oldFileId, newFileId); writeJournalEntry(newRenameStoreEntry(oldFileId, newFileId)); flushJournal(); } private void renameStoreInternal(long oldFileId, long newFileId) { List<PartitionInfo> partitionsRenamed = mCompleteStoreToPartitions.remove(oldFileId); mCompleteStoreToPartitions.put(newFileId, partitionsRenamed); } // Rename one completed stores, called when replaying journals. private void renameStoreFromEntry(RenameStoreEntry entry) { renameStoreInternal(entry.getOldStoreId(), entry.getNewStoreId()); } /** * Merges one completed key-value store to another completed key-value store. * * @param fromUri the {@link AlluxioURI} to the store to be merged * @param toUri the {@link AlluxioURI} to the store to be merged to * @throws InvalidPathException if the uri exists but is not a key-value store * @throws FileDoesNotExistException if the uri does not exist */ public synchronized void mergeStore(AlluxioURI fromUri, AlluxioURI toUri) throws IOException, FileDoesNotExistException, InvalidPathException, AlluxioException { long fromFileId = getFileId(fromUri); long toFileId = getFileId(toUri); checkIsCompletePartition(fromFileId, fromUri); checkIsCompletePartition(toFileId, toUri); // Rename fromUri to "toUri/%s-%s" % (last component of fromUri, UUID). // NOTE: rename does not change the existing block IDs. mFileSystemMaster.rename(fromUri, new AlluxioURI(PathUtils.concatPath(toUri.toString(), String.format("%s-%s", fromUri.getName(), UUID.randomUUID().toString()))), RenameOptions.defaults()); mergeStoreInternal(fromFileId, toFileId); writeJournalEntry(newMergeStoreEntry(fromFileId, toFileId)); flushJournal(); } // Internal implementation to merge two completed stores. private void mergeStoreInternal(long fromFileId, long toFileId) { // Move partition infos to the new store. List<PartitionInfo> partitionsToBeMerged = mCompleteStoreToPartitions.remove(fromFileId); mCompleteStoreToPartitions.get(toFileId).addAll(partitionsToBeMerged); } // Merges two completed stores, called when replaying journals. private void mergeStoreFromEntry(MergeStoreEntry entry) { mergeStoreInternal(entry.getFromStoreId(), entry.getToStoreId()); } /** * Gets a list of partitions of a given key-value store. * * @param path URI of the key-value store * @return a list of partition information * @throws FileDoesNotExistException if the key-value store URI does not exists * @throws AccessControlException if permission checking fails * @throws InvalidPathException if the path is invalid */ public synchronized List<PartitionInfo> getPartitionInfo(AlluxioURI path) throws FileDoesNotExistException, AccessControlException, InvalidPathException { long fileId = getFileId(path); List<PartitionInfo> partitions = mCompleteStoreToPartitions.get(fileId); if (partitions == null) { return new ArrayList<>(); } return partitions; } private JournalEntry newCreateStoreEntry(long fileId) { CreateStoreEntry createStore = CreateStoreEntry.newBuilder().setStoreId(fileId).build(); return JournalEntry.newBuilder().setCreateStore(createStore).build(); } private JournalEntry newCompletePartitionEntry(long fileId, PartitionInfo info) { CompletePartitionEntry completePartition = CompletePartitionEntry.newBuilder().setStoreId(fileId).setBlockId(info.getBlockId()) .setKeyStart(new String(info.bufferForKeyStart().array())) .setKeyLimit(new String(info.bufferForKeyLimit().array())) .setKeyCount(info.getKeyCount()).build(); return JournalEntry.newBuilder().setCompletePartition(completePartition).build(); } private JournalEntry newCompleteStoreEntry(long fileId) { CompleteStoreEntry completeStore = CompleteStoreEntry.newBuilder().setStoreId(fileId).build(); return JournalEntry.newBuilder().setCompleteStore(completeStore).build(); } private JournalEntry newDeleteStoreEntry(long fileId) { DeleteStoreEntry deleteStore = DeleteStoreEntry.newBuilder().setStoreId(fileId).build(); return JournalEntry.newBuilder().setDeleteStore(deleteStore).build(); } private JournalEntry newRenameStoreEntry(long oldFileId, long newFileId) { RenameStoreEntry renameStore = RenameStoreEntry.newBuilder().setOldStoreId(oldFileId) .setNewStoreId(newFileId).build(); return JournalEntry.newBuilder().setRenameStore(renameStore).build(); } private JournalEntry newMergeStoreEntry(long fromFileId, long toFileId) { MergeStoreEntry mergeStore = MergeStoreEntry.newBuilder().setFromStoreId(fromFileId) .setToStoreId(toFileId).build(); return JournalEntry.newBuilder().setMergeStore(mergeStore).build(); } private Iterator<JournalEntry> getStoreIterator( Map<Long, List<PartitionInfo>> storeToPartitions) { final Iterator<Map.Entry<Long, List<PartitionInfo>>> it = storeToPartitions.entrySet().iterator(); return new Iterator<JournalEntry>() { // Initial state: mEntry == null, mInfoIterator == null // hasNext: mEntry == null, mInfoIterator == null, it.hasNext() // mEntry == null, mInfoIterator == null, => create // mEntry != null, mInfoIterator.hasNext() => partitions // mEntry != null, !mInfoIterator.hasNext() => complete private Map.Entry<Long, List<PartitionInfo>> mEntry; private Iterator<PartitionInfo> mInfoIterator; @Override public boolean hasNext() { if (mEntry == null && mInfoIterator == null && !it.hasNext()) { return false; } return true; } @Override public JournalEntry next() { if (!hasNext()) { throw new NoSuchElementException(); } if (mEntry == null) { Preconditions.checkState(mInfoIterator == null); mEntry = it.next(); mInfoIterator = mEntry.getValue().iterator(); return newCreateStoreEntry(mEntry.getKey()); } if (mInfoIterator.hasNext()) { return newCompletePartitionEntry(mEntry.getKey(), mInfoIterator.next()); } JournalEntry completeEntry = newCompleteStoreEntry(mEntry.getKey()); mEntry = null; mInfoIterator = null; return completeEntry; } @Override public void remove() { throw new UnsupportedOperationException("remove is not supported."); } }; } }