/* * Copyright (C) 2011, Google Inc. * and other copyright owners as documented in the project's IP log. * * This program and the accompanying materials are made available * under the terms of the Eclipse Distribution License v1.0 which * accompanies this distribution, is reproduced below, and is * available at http://www.eclipse.org/org/documents/edl-v10.php * * All rights reserved. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * - Neither the name of the Eclipse Foundation, Inc. nor the * names of its contributors may be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.eclipse.jgit.internal.storage.dfs; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.internal.storage.file.PackReverseIndex; import org.eclipse.jgit.internal.storage.pack.PackWriter; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectIdSet; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.revwalk.RevFlag; import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.storage.pack.PackStatistics; import org.eclipse.jgit.util.BlockList; import org.eclipse.jgit.util.io.CountingOutputStream; /** * Combine several pack files into one pack. * <p> * The compactor combines several pack files together by including all objects * contained in each pack file into the same output pack. If an object appears * multiple times, it is only included once in the result. Because the new pack * is constructed by enumerating the indexes of the source packs, it is quicker * than doing a full repack of the repository, however the result is not nearly * as space efficient as new delta compression is disabled. * <p> * This method is suitable for quickly combining several packs together after * receiving a number of small fetch or push operations into a repository, * allowing the system to maintain reasonable read performance without expending * a lot of time repacking the entire repository. */ public class DfsPackCompactor { private final DfsRepository repo; private final List<DfsPackFile> srcPacks; private final List<ObjectIdSet> exclude; private final List<DfsPackDescription> newPacks; private final List<PackStatistics> newStats; private int autoAddSize; private RevWalk rw; private RevFlag added; private RevFlag isBase; /** * Initialize a pack compactor. * * @param repository * repository objects to be packed will be read from. */ public DfsPackCompactor(DfsRepository repository) { repo = repository; autoAddSize = 5 * 1024 * 1024; // 5 MiB srcPacks = new ArrayList<DfsPackFile>(); exclude = new ArrayList<ObjectIdSet>(4); newPacks = new ArrayList<DfsPackDescription>(1); newStats = new ArrayList<PackStatistics>(1); } /** * Add a pack to be compacted. * <p> * All of the objects in this pack will be copied into the resulting pack. * The resulting pack will order objects according to the source pack's own * description ordering (which is based on creation date), and then by the * order the objects appear in the source pack. * * @param pack * a pack to combine into the resulting pack. * @return {@code this} */ public DfsPackCompactor add(DfsPackFile pack) { srcPacks.add(pack); return this; } /** * Automatically select packs to be included, and add them. * <p> * Packs are selected based on size, smaller packs get included while bigger * ones are omitted. * * @return {@code this} * @throws IOException * existing packs cannot be read. */ public DfsPackCompactor autoAdd() throws IOException { DfsObjDatabase objdb = repo.getObjectDatabase(); for (DfsPackFile pack : objdb.getPacks()) { DfsPackDescription d = pack.getPackDescription(); if (d.getFileSize(PACK) < autoAddSize) add(pack); else exclude(pack); } return this; } /** * Exclude objects from the compacted pack. * * @param set * objects to not include. * @return {@code this}. */ public DfsPackCompactor exclude(ObjectIdSet set) { exclude.add(set); return this; } /** * Exclude objects from the compacted pack. * * @param pack * objects to not include. * @return {@code this}. * @throws IOException * pack index cannot be loaded. */ public DfsPackCompactor exclude(DfsPackFile pack) throws IOException { final PackIndex idx; try (DfsReader ctx = (DfsReader) repo.newObjectReader()) { idx = pack.getPackIndex(ctx); } return exclude(idx); } /** * Compact the pack files together. * * @param pm * progress monitor to receive updates on as packing may take a * while, depending on the size of the repository. * @throws IOException * the packs cannot be compacted. */ public void compact(ProgressMonitor pm) throws IOException { if (pm == null) pm = NullProgressMonitor.INSTANCE; DfsObjDatabase objdb = repo.getObjectDatabase(); try (DfsReader ctx = (DfsReader) objdb.newReader()) { PackConfig pc = new PackConfig(repo); pc.setIndexVersion(2); pc.setDeltaCompress(false); pc.setReuseDeltas(true); pc.setReuseObjects(true); PackWriter pw = new PackWriter(pc, ctx); try { pw.setDeltaBaseAsOffset(true); pw.setReuseDeltaCommits(false); addObjectsToPack(pw, ctx, pm); if (pw.getObjectCount() == 0) { List<DfsPackDescription> remove = toPrune(); if (remove.size() > 0) objdb.commitPack( Collections.<DfsPackDescription>emptyList(), remove); return; } boolean rollback = true; DfsPackDescription pack = objdb.newPack(COMPACT); try { writePack(objdb, pack, pw, pm); writeIndex(objdb, pack, pw); PackStatistics stats = pw.getStatistics(); pw.close(); pw = null; pack.setPackStats(stats); objdb.commitPack(Collections.singletonList(pack), toPrune()); newPacks.add(pack); newStats.add(stats); rollback = false; } finally { if (rollback) objdb.rollbackPack(Collections.singletonList(pack)); } } finally { if (pw != null) pw.close(); } } finally { rw = null; } } /** @return all of the source packs that fed into this compaction. */ public List<DfsPackDescription> getSourcePacks() { return toPrune(); } /** @return new packs created by this compaction. */ public List<DfsPackDescription> getNewPacks() { return newPacks; } /** @return statistics corresponding to the {@link #getNewPacks()}. */ public List<PackStatistics> getNewPackStatistics() { return newStats; } private List<DfsPackDescription> toPrune() { int cnt = srcPacks.size(); List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt); for (DfsPackFile pack : srcPacks) all.add(pack.getPackDescription()); return all; } private void addObjectsToPack(PackWriter pw, DfsReader ctx, ProgressMonitor pm) throws IOException, IncorrectObjectTypeException { // Sort packs by description ordering, this places newer packs before // older packs, allowing the PackWriter to be handed newer objects // first and older objects last. Collections.sort(srcPacks, new Comparator<DfsPackFile>() { public int compare(DfsPackFile a, DfsPackFile b) { return a.getPackDescription().compareTo(b.getPackDescription()); } }); rw = new RevWalk(ctx); added = rw.newFlag("ADDED"); //$NON-NLS-1$ isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$ List<RevObject> baseObjects = new BlockList<RevObject>(); pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN); for (DfsPackFile src : srcPacks) { List<ObjectIdWithOffset> want = toInclude(src, ctx); if (want.isEmpty()) continue; PackReverseIndex rev = src.getReverseIdx(ctx); DfsObjectRepresentation rep = new DfsObjectRepresentation(src); for (ObjectIdWithOffset id : want) { int type = src.getObjectType(ctx, id.offset); RevObject obj = rw.lookupAny(id, type); if (obj.has(added)) continue; pm.update(1); pw.addObject(obj); obj.add(added); src.representation(rep, id.offset, ctx, rev); if (rep.getFormat() != PACK_DELTA) continue; RevObject base = rw.lookupAny(rep.getDeltaBase(), type); if (!base.has(added) && !base.has(isBase)) { baseObjects.add(base); base.add(isBase); } } } for (RevObject obj : baseObjects) { if (!obj.has(added)) { pm.update(1); pw.addObject(obj); obj.add(added); } } pm.endTask(); } private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx) throws IOException { PackIndex srcIdx = src.getPackIndex(ctx); List<ObjectIdWithOffset> want = new BlockList<ObjectIdWithOffset>( (int) srcIdx.getObjectCount()); SCAN: for (PackIndex.MutableEntry ent : srcIdx) { ObjectId id = ent.toObjectId(); RevObject obj = rw.lookupOrNull(id); if (obj != null && (obj.has(added) || obj.has(isBase))) continue; for (ObjectIdSet e : exclude) if (e.contains(id)) continue SCAN; want.add(new ObjectIdWithOffset(id, ent.getOffset())); } Collections.sort(want, new Comparator<ObjectIdWithOffset>() { public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) { return Long.signum(a.offset - b.offset); } }); return want; } private static void writePack(DfsObjDatabase objdb, DfsPackDescription pack, PackWriter pw, ProgressMonitor pm) throws IOException { DfsOutputStream out = objdb.writeFile(pack, PACK); try { pw.writePack(pm, pm, out); pack.addFileExt(PACK); } finally { out.close(); } } private static void writeIndex(DfsObjDatabase objdb, DfsPackDescription pack, PackWriter pw) throws IOException { DfsOutputStream out = objdb.writeFile(pack, INDEX); try { CountingOutputStream cnt = new CountingOutputStream(out); pw.writeIndex(cnt); pack.addFileExt(INDEX); pack.setFileSize(INDEX, cnt.getCount()); pack.setIndexVersion(pw.getIndexVersion()); } finally { out.close(); } } private static class ObjectIdWithOffset extends ObjectId { final long offset; ObjectIdWithOffset(AnyObjectId id, long ofs) { super(id); offset = ofs; } } }