// Copyright 2014 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.lib.util; import com.google.common.collect.ForwardingMap; import com.google.common.io.ByteStreams; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.Hashtable; import java.util.LinkedHashMap; import java.util.Map; import java.util.logging.Logger; /** * A map that is backed by persistent storage. It uses two files on disk for * this: The first file contains all the entries and gets written when invoking * the {@link #save()} method. The second file contains a journal of all entries * that were added to or removed from the map since constructing the instance of * the map or the last invocation of {@link #save()} and gets written after each * update of the map although sub-classes are free to implement their own * journal update strategy. * * <p><b>Ceci n'est pas un Map</b>. Strictly speaking, the {@link Map} * interface doesn't permit the possibility of failure. This class uses * persistence; persistence means I/O, and I/O means the possibility of * failure. Therefore the semantics of this may deviate from the Map contract * in failure cases. In particular, updates are not guaranteed to succeed. * However, I/O failures are guaranteed to be reported upon the subsequent call * to a method that throws {@code IOException} such as {@link #save}. * * <p>To populate the map entries using the previously persisted entries call * {@link #load()} prior to invoking any other map operation. * <p> * Like {@link Hashtable} but unlike {@link HashMap}, this class does * <em>not</em> allow <tt>null</tt> to be used as a key or a value. * <p> * IO failures during reading or writing the map entries to disk may result in * {@link AssertionError} getting thrown from the failing method. * <p> * The implementation of the map is not synchronized. If access from multiple * threads is required it must be synchronized using an external object. * <p> * The constructor allows passing in a version number that gets written to the * files on disk and checked before reading from disk. Files with an * incompatible version number will be ignored. This allows the client code to * change the persistence format without polluting the file system name space. */ public abstract class PersistentMap<K, V> extends ForwardingMap<K, V> { private static final int MAGIC = 0x20071105; private static final int ENTRY_MAGIC = 0xfe; private static final int MIN_MAPFILE_SIZE = 16; private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8; private static final Logger LOG = Logger.getLogger(PersistentMap.class.getName()); private final int version; private final Path mapFile; private final Path journalFile; private final Map<K, V> journal; private DataOutputStream journalOut; /** * 'dirty' is true when the in-memory representation of the map is more recent * than the on-disk representation. */ private boolean dirty; /** * If non-null, contains the message from an {@code IOException} thrown by a * previously failed write. This error is deferred until the next call to a * method which is able to throw an exception. */ private String deferredIOFailure = null; /** * 'loaded' is true when the in-memory representation is at least as recent as * the on-disk representation. */ private boolean loaded; private final Map<K, V> delegate; /** * Creates a new PersistentMap instance using the specified backing map. * * @param version the version tag. Changing the version tag allows updating * the on disk format. The map will never read from a file that was * written using a different version tag. * @param map the backing map to use for this PersistentMap. * @param mapFile the file to save the map entries to. * @param journalFile the journal file to write entries between invocations of * {@link #save()}. */ public PersistentMap(int version, Map<K, V> map, Path mapFile, Path journalFile) { this.version = version; journal = new LinkedHashMap<>(); this.mapFile = mapFile; this.journalFile = journalFile; delegate = map; } @Override protected Map<K, V> delegate() { return delegate; } @Override public V put(K key, V value) { if (key == null) { throw new NullPointerException(); } if (value == null) { throw new NullPointerException(); } V previous = delegate().put(key, value); journal.put(key, value); markAsDirty(); return previous; } /** * Marks the map as dirty and potentially writes updated entries to the * journal. */ protected void markAsDirty() { dirty = true; if (updateJournal()) { writeJournal(); } } /** * Determines if the journal should be updated. The default implementation * always returns 'true', but subclasses are free to override this to * implement their own journal updating strategy. For example it is possible * to implement an update at most every five seconds using the following code: * * <pre> * private long nextUpdate; * protected boolean updateJournal() { * long time = System.currentTimeMillis(); * if (time > nextUpdate) { * nextUpdate = time + 5 * 1000; * return true; * } * return false; * } * </pre> */ protected boolean updateJournal() { return true; } @Override @SuppressWarnings("unchecked") public V remove(Object object) { V previous = delegate().remove(object); if (previous != null) { // we know that 'object' must be an instance of K, because the // remove call succeeded, i.e. 'object' was mapped to 'previous'. journal.put((K) object, null); // unchecked markAsDirty(); } return previous; } /** * Updates the persistent journal by writing all entries to the * {@link #journalOut} stream and clearing the in memory journal. */ private void writeJournal() { try { if (journalOut == null) { if (journalFile.exists()) { // The journal file was left around after the last save() because // keepJournal() was true. Append to it. journalOut = new DataOutputStream(new BufferedOutputStream(journalFile.getOutputStream(true))); } else { // Create new journal. journalOut = createMapFile(journalFile); } } writeEntries(journalOut, journal); journalOut.flush(); journal.clear(); } catch (IOException e) { this.deferredIOFailure = e.getMessage() + " during journal append"; } } protected void forceFlush() { if (dirty) { writeJournal(); } } /** * Load the previous written map entries from disk. * * @param failFast if true, throw IOException rather than silently ignoring. * @throws IOException */ public void load(boolean failFast) throws IOException { if (!loaded) { loadEntries(mapFile, failFast); if (journalFile.exists()) { try { loadEntries(journalFile, failFast); } catch (IOException e) { if (failFast) { throw e; } //Else: ignore any errors reading the journal file as it may contain //partial entries. } // Force the map to be dirty, so that we can save it to disk. dirty = true; save(/*fullSave=*/ true); } else { dirty = false; } loaded = true; } } /** * Load the previous written map entries from disk. * * @throws IOException */ public void load() throws IOException { load(/*throwOnLoadFailure=*/ false); } @Override public void clear() { super.clear(); markAsDirty(); try { save(); } catch (IOException e) { this.deferredIOFailure = e.getMessage() + " during map write"; } } /** * Saves all the entries of this map to disk and deletes the journal file. * * @throws IOException if there was an I/O error during this call, or any previous call since the * last save(). */ public long save() throws IOException { return save(false); } /** * Saves all the entries of this map to disk and deletes the journal file. * * @param fullSave if true, always write the full cache to disk, without the * journal. * @throws IOException if there was an I/O error during this call, or any * previous call since the last save(). */ private long save(boolean fullSave) throws IOException { /* Report a previously failing I/O operation. */ if (deferredIOFailure != null) { try { throw new IOException(deferredIOFailure); } finally { deferredIOFailure = null; } } if (dirty) { if (!fullSave && keepJournal()) { forceFlush(); journalOut.close(); journalOut = null; return journalSize() + cacheSize(); } else { dirty = false; Path mapTemp = mapFile.getRelative(FileSystemUtils.replaceExtension(mapFile.asFragment(), ".tmp")); try { saveEntries(delegate(), mapTemp); mapFile.delete(); mapTemp.renameTo(mapFile); } finally { mapTemp.delete(); } clearJournal(); journalFile.delete(); return cacheSize(); } } else { return cacheSize(); } } protected final long journalSize() throws IOException { return journalFile.exists() ? journalFile.getFileSize() : 0; } protected final long cacheSize() throws IOException { return mapFile.exists() ? mapFile.getFileSize() : 0; } /** * If true, keep the journal during the save(). The journal is flushed, but * the map file is not touched. This may be useful in cases where the journal * is much smaller than the map. */ protected boolean keepJournal() { return false; } private void clearJournal() throws IOException { journal.clear(); if (journalOut != null) { journalOut.close(); journalOut = null; } } private void loadEntries(Path mapFile, boolean failFast) throws IOException { if (!mapFile.exists()) { return; } long fileSize = mapFile.getFileSize(); if (fileSize < MIN_MAPFILE_SIZE) { if (failFast) { throw new IOException(mapFile + " is too short: Only " + fileSize + " bytes"); } else { return; } } else if (fileSize > MAX_ARRAY_SIZE) { if (failFast) { throw new IOException(mapFile + " is too long: " + fileSize + " bytes"); } else { return; } } // We read the whole file up front as a performance optimization; otherwise calling available() // on the stream over and over does a lot of syscalls. byte[] mapBytes; try (InputStream fileInput = mapFile.getInputStream()) { mapBytes = ByteStreams.toByteArray(new BufferedInputStream(fileInput)); } DataInputStream in = new DataInputStream(new ByteArrayInputStream(mapBytes)); try { if (in.readLong() != MAGIC) { // not a PersistentMap if (failFast) { throw new IOException("Unexpected format"); } return; } if (in.readLong() != version) { // PersistentMap version incompatible if (failFast) { throw new IOException("Unexpected format"); } return; } readEntries(in, failFast); } finally { in.close(); } LOG.info(String.format("Loaded cache '%s' [%s bytes]", mapFile, fileSize)); } /** * Saves the entries in the specified map into the specified file. * * @param map the map to be written into the file. * @param mapFile the file the map is written to. * @throws IOException */ private void saveEntries(Map<K, V> map, Path mapFile) throws IOException { try (DataOutputStream out = createMapFile(mapFile)) { writeEntries(out, map); } } /** * Creates the specified file and returns the DataOuputStream suitable for writing entries. * * @param mapFile the file the map is written to. * @return the DataOutputStream that was can be used for saving the map to the file. * @throws IOException */ private DataOutputStream createMapFile(Path mapFile) throws IOException { FileSystemUtils.createDirectoryAndParents(mapFile.getParentDirectory()); DataOutputStream out = new DataOutputStream(new BufferedOutputStream(mapFile.getOutputStream())); out.writeLong(MAGIC); out.writeLong(version); return out; } /** * Writes the Map entries to the specified DataOutputStream. * * @param out the DataOutputStream to write the Map entries to. * @param map the Map containing the entries to be written to the * DataOutputStream. * @throws IOException */ private void writeEntries(DataOutputStream out, Map<K, V> map) throws IOException { for (Map.Entry<K, V> entry : map.entrySet()) { out.writeByte(ENTRY_MAGIC); writeKey(entry.getKey(), out); V value = entry.getValue(); boolean isEntry = (value != null); out.writeBoolean(isEntry); if (isEntry) { writeValue(value, out); } } } /** * Reads the Map entries from the specified DataInputStream. * * @param failFast if true, throw IOException if entries are in an unexpected * format. * @param in the DataInputStream to read the Map entries from. * @throws IOException */ private void readEntries(DataInputStream in, boolean failFast) throws IOException { Map<K, V> map = delegate(); while (hasEntries(in, failFast)) { K key = readKey(in); boolean isEntry = in.readBoolean(); if (isEntry) { V value = readValue(in); map.put(key, value); } else { map.remove(key); } } } private boolean hasEntries(DataInputStream in, boolean failFast) throws IOException { if (in.available() <= 0) { return false; } else if (in.readUnsignedByte() != ENTRY_MAGIC) { if (failFast) { throw new IOException("Corrupted entry separator"); } else { return false; } } return true; } /** * Writes a key of this map into the specified DataOutputStream. * * @param key the key to write to the DataOutputStream. * @param out the DataOutputStream to write the entry to. * @throws IOException */ protected abstract void writeKey(K key, DataOutputStream out) throws IOException; /** * Writes a value of this map into the specified DataOutputStream. * * @param value the value to write to the DataOutputStream. * @param out the DataOutputStream to write the entry to. * @throws IOException */ protected abstract void writeValue(V value, DataOutputStream out) throws IOException; /** * Reads an entry of this map from the specified DataInputStream. * * @param in the DataOutputStream to read the entry from. * @return the entry that was read from the DataInputStream. * @throws IOException */ protected abstract K readKey(DataInputStream in) throws IOException; /** * Reads an entry of this map from the specified DataInputStream. * * @param in the DataOutputStream to read the entry from. * @return the entry that was read from the DataInputStream. * @throws IOException */ protected abstract V readValue(DataInputStream in) throws IOException; }