package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.util.Set; import java.util.Map.Entry; import java.io.IOException; import org.apache.lucene.store.Directory; /** * An {@link IndexDeletionPolicy} that wraps around any other * {@link IndexDeletionPolicy} and adds the ability to hold and later release * snapshots of an index. While a snapshot is held, the {@link IndexWriter} will * not remove any files associated with it even if the index is otherwise being * actively, arbitrarily changed. Because we wrap another arbitrary * {@link IndexDeletionPolicy}, this gives you the freedom to continue using * whatever {@link IndexDeletionPolicy} you would normally want to use with your * index. * * <p> * This class maintains all snapshots in-memory, and so the information is not * persisted and not protected against system failures. If persistency is * important, you can use {@link PersistentSnapshotDeletionPolicy} (or your own * extension) and when creating a new instance of this deletion policy, pass the * persistent snapshots information to * {@link #SnapshotDeletionPolicy(IndexDeletionPolicy, Map)}. * * @lucene.experimental */ public class SnapshotDeletionPolicy implements IndexDeletionPolicy { /** Holds a Snapshot's information. */ private static class SnapshotInfo { String id; String segmentsFileName; IndexCommit commit; public SnapshotInfo(String id, String segmentsFileName, IndexCommit commit) { this.id = id; this.segmentsFileName = segmentsFileName; this.commit = commit; } @Override public String toString() { return id + " : " + segmentsFileName; } } protected class SnapshotCommitPoint extends IndexCommit { protected IndexCommit cp; protected SnapshotCommitPoint(IndexCommit cp) { this.cp = cp; } /** * Returns true if this segment can be deleted. The default implementation * returns false if this segment is currently held as snapshot. */ protected boolean shouldDelete(String segmentsFileName) { return !segmentsFileToIDs.containsKey(segmentsFileName); } @Override public void delete() { synchronized (SnapshotDeletionPolicy.this) { // Suppress the delete request if this commit point is // currently snapshotted. if (shouldDelete(getSegmentsFileName())) { cp.delete(); } } } @Override public Directory getDirectory() { return cp.getDirectory(); } @Override public Collection<String> getFileNames() throws IOException { return cp.getFileNames(); } @Override public long getGeneration() { return cp.getGeneration(); } @Override public String getSegmentsFileName() { return cp.getSegmentsFileName(); } @Override public Map<String, String> getUserData() throws IOException { return cp.getUserData(); } @Override public long getVersion() { return cp.getVersion(); } @Override public boolean isDeleted() { return cp.isDeleted(); } @Override public boolean isOptimized() { return cp.isOptimized(); } } /** Snapshots info */ private Map<String, SnapshotInfo> idToSnapshot = new HashMap<String, SnapshotInfo>(); // multiple IDs could point to the same commit point (segments file name) private Map<String, Set<String>> segmentsFileToIDs = new HashMap<String, Set<String>>(); private IndexDeletionPolicy primary; protected IndexCommit lastCommit; public SnapshotDeletionPolicy(IndexDeletionPolicy primary) { this.primary = primary; } /** * {@link SnapshotDeletionPolicy} wraps another {@link IndexDeletionPolicy} to * enable flexible snapshotting. * * @param primary * the {@link IndexDeletionPolicy} that is used on non-snapshotted * commits. Snapshotted commits, are not deleted until explicitly * released via {@link #release(String)} * @param snapshotsInfo * A mapping of snapshot ID to the segments filename that is being * snapshotted. The expected input would be the output of * {@link #getSnapshots()}. A null value signals that there are no * initial snapshots to maintain. */ public SnapshotDeletionPolicy(IndexDeletionPolicy primary, Map<String, String> snapshotsInfo) { this(primary); if (snapshotsInfo != null) { // Add the ID->segmentIDs here - the actual IndexCommits will be // reconciled on the call to onInit() for (Entry<String, String> e : snapshotsInfo.entrySet()) { registerSnapshotInfo(e.getKey(), e.getValue(), null); } } } /** * Checks if the given id is already used by another snapshot, and throws * {@link IllegalStateException} if it is. */ protected void checkSnapshotted(String id) { if (isSnapshotted(id)) { throw new IllegalStateException("Snapshot ID " + id + " is already used - must be unique"); } } /** Registers the given snapshot information. */ protected void registerSnapshotInfo(String id, String segment, IndexCommit commit) { idToSnapshot.put(id, new SnapshotInfo(id, segment, commit)); Set<String> ids = segmentsFileToIDs.get(segment); if (ids == null) { ids = new HashSet<String>(); segmentsFileToIDs.put(segment, ids); } ids.add(id); } protected List<IndexCommit> wrapCommits(List<? extends IndexCommit> commits) { List<IndexCommit> wrappedCommits = new ArrayList<IndexCommit>(commits.size()); for (IndexCommit ic : commits) { wrappedCommits.add(new SnapshotCommitPoint(ic)); } return wrappedCommits; } /** * Get a snapshotted IndexCommit by ID. The IndexCommit can then be used to * open an IndexReader on a specific commit point, or rollback the index by * opening an IndexWriter with the IndexCommit specified in its * {@link IndexWriterConfig}. * * @param id * a unique identifier of the commit that was snapshotted. * @throws IllegalStateException * if no snapshot exists by the specified ID. * @return The {@link IndexCommit} for this particular snapshot. */ public synchronized IndexCommit getSnapshot(String id) { SnapshotInfo snapshotInfo = idToSnapshot.get(id); if (snapshotInfo == null) { throw new IllegalStateException("No snapshot exists by ID: " + id); } return snapshotInfo.commit; } /** * Get all the snapshots in a map of snapshot IDs to the segments they * 'cover.' This can be passed to * {@link #SnapshotDeletionPolicy(IndexDeletionPolicy, Map)} in order to * initialize snapshots at construction. */ public synchronized Map<String, String> getSnapshots() { Map<String, String> snapshots = new HashMap<String, String>(); for (Entry<String, SnapshotInfo> e : idToSnapshot.entrySet()) { snapshots.put(e.getKey(), e.getValue().segmentsFileName); } return snapshots; } /** * Returns true if the given ID is already used by a snapshot. You can call * this method before {@link #snapshot(String)} if you are not sure whether * the ID is already used or not. */ public boolean isSnapshotted(String id) { return idToSnapshot.containsKey(id); } public synchronized void onCommit(List<? extends IndexCommit> commits) throws IOException { primary.onCommit(wrapCommits(commits)); lastCommit = commits.get(commits.size() - 1); } public synchronized void onInit(List<? extends IndexCommit> commits) throws IOException { primary.onInit(wrapCommits(commits)); lastCommit = commits.get(commits.size() - 1); /* * Assign snapshotted IndexCommits to their correct snapshot IDs as * specified in the constructor. */ for (IndexCommit commit : commits) { Set<String> ids = segmentsFileToIDs.get(commit.getSegmentsFileName()); if (ids != null) { for (String id : ids) { idToSnapshot.get(id).commit = commit; } } } /* * Second, see if there are any instances where a snapshot ID was specified * in the constructor but an IndexCommit doesn't exist. In this case, the ID * should be removed. * * Note: This code is protective for extreme cases where IDs point to * non-existent segments. As the constructor should have received its * information via a call to getSnapshots(), the data should be well-formed. */ // Find lost snapshots ArrayList<String> idsToRemove = null; for (Entry<String, SnapshotInfo> e : idToSnapshot.entrySet()) { if (e.getValue().commit == null) { if (idsToRemove == null) { idsToRemove = new ArrayList<String>(); } idsToRemove.add(e.getKey()); } } // Finally, remove those 'lost' snapshots. if (idsToRemove != null) { for (String id : idsToRemove) { SnapshotInfo info = idToSnapshot.remove(id); segmentsFileToIDs.remove(info.segmentsFileName); } } } /** * Release a snapshotted commit by ID. * * @param id * a unique identifier of the commit that is un-snapshotted. * @throws IllegalStateException * if no snapshot exists by this ID. */ public synchronized void release(String id) throws IOException { SnapshotInfo info = idToSnapshot.remove(id); if (info == null) { throw new IllegalStateException("Snapshot doesn't exist: " + id); } Set<String> ids = segmentsFileToIDs.get(info.segmentsFileName); if (ids != null) { ids.remove(id); if (ids.size() == 0) { segmentsFileToIDs.remove(info.segmentsFileName); } } } /** * Snapshots the last commit. Once a commit is 'snapshotted,' it is protected * from deletion (as long as this {@link IndexDeletionPolicy} is used). The * commit can be removed by calling {@link #release(String)} using the same ID * parameter followed by a call to {@link IndexWriter#deleteUnusedFiles()}. * <p> * <b>NOTE:</b> ID must be unique in the system. If the same ID is used twice, * an {@link IllegalStateException} is thrown. * <p> * <b>NOTE:</b> while the snapshot is held, the files it references will not * be deleted, which will consume additional disk space in your index. If you * take a snapshot at a particularly bad time (say just before you call * optimize()) then in the worst case this could consume an extra 1X of your * total index size, until you release the snapshot. * * @param id * a unique identifier of the commit that is being snapshotted. * @throws IllegalStateException * if either there is no 'last commit' to snapshot, or if the * parameter 'ID' refers to an already snapshotted commit. * @return the {@link IndexCommit} that was snapshotted. */ public synchronized IndexCommit snapshot(String id) throws IOException { if (lastCommit == null) { // no commit exists. Really shouldn't happen, but might be if SDP is // accessed before onInit or onCommit were called. throw new IllegalStateException("No index commit to snapshot"); } // Can't use the same snapshot ID twice... checkSnapshotted(id); registerSnapshotInfo(id, lastCommit.getSegmentsFileName(), lastCommit); return lastCommit; } }