/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.tserver.tablet; import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicLong; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.data.impl.KeyExtent; import org.apache.accumulo.core.file.FileOperations; import org.apache.accumulo.core.file.FileSKVIterator; import org.apache.accumulo.core.file.FileSKVWriter; import org.apache.accumulo.core.iterators.IteratorUtil; import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator; import org.apache.accumulo.core.iterators.system.DeletingIterator; import org.apache.accumulo.core.iterators.system.MultiIterator; import org.apache.accumulo.core.iterators.system.TimeSettingIterator; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.trace.Span; import org.apache.accumulo.core.trace.Trace; import org.apache.accumulo.core.util.LocalityGroupUtil; import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError; import org.apache.accumulo.core.util.ratelimit.RateLimiter; import org.apache.accumulo.server.AccumuloServerContext; import org.apache.accumulo.server.fs.FileRef; import org.apache.accumulo.server.fs.VolumeManager; import org.apache.accumulo.server.problems.ProblemReport; import org.apache.accumulo.server.problems.ProblemReportingIterator; import org.apache.accumulo.server.problems.ProblemReports; import org.apache.accumulo.server.problems.ProblemType; import org.apache.accumulo.tserver.InMemoryMap; import org.apache.accumulo.tserver.MinorCompactionReason; import org.apache.accumulo.tserver.TabletIteratorEnvironment; import org.apache.accumulo.tserver.compaction.MajorCompactionReason; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class Compactor implements Callable<CompactionStats> { private static final Logger log = LoggerFactory.getLogger(Compactor.class); private static final AtomicLong nextCompactorID = new AtomicLong(0); public static class CompactionCanceledException extends Exception { private static final long serialVersionUID = 1L; } public interface CompactionEnv { boolean isCompactionEnabled(); IteratorScope getIteratorScope(); RateLimiter getReadLimiter(); RateLimiter getWriteLimiter(); } private final Map<FileRef,DataFileValue> filesToCompact; private final InMemoryMap imm; private final FileRef outputFile; private final boolean propogateDeletes; private final AccumuloConfiguration acuTableConf; private final CompactionEnv env; private final VolumeManager fs; protected final KeyExtent extent; private final List<IteratorSetting> iterators; // things to report private String currentLocalityGroup = ""; private final long startTime; private int reason; private final AtomicLong entriesRead = new AtomicLong(0); private final AtomicLong entriesWritten = new AtomicLong(0); private final DateFormat dateFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS"); // a unique id to identify a compactor private final long compactorID = nextCompactorID.getAndIncrement(); protected volatile Thread thread; private final AccumuloServerContext context; public long getCompactorID() { return compactorID; } private synchronized void setLocalityGroup(String name) { this.currentLocalityGroup = name; } public synchronized String getCurrentLocalityGroup() { return currentLocalityGroup; } private void clearStats() { entriesRead.set(0); entriesWritten.set(0); } protected static final Set<Compactor> runningCompactions = Collections.synchronizedSet(new HashSet<Compactor>()); public static List<CompactionInfo> getRunningCompactions() { ArrayList<CompactionInfo> compactions = new ArrayList<>(); synchronized (runningCompactions) { for (Compactor compactor : runningCompactions) { compactions.add(new CompactionInfo(compactor)); } } return compactions; } public Compactor(AccumuloServerContext context, Tablet tablet, Map<FileRef,DataFileValue> files, InMemoryMap imm, FileRef outputFile, boolean propogateDeletes, CompactionEnv env, List<IteratorSetting> iterators, int reason, AccumuloConfiguration tableConfiguation) { this.context = context; this.extent = tablet.getExtent(); this.fs = tablet.getTabletServer().getFileSystem(); this.acuTableConf = tableConfiguation; this.filesToCompact = files; this.imm = imm; this.outputFile = outputFile; this.propogateDeletes = propogateDeletes; this.env = env; this.iterators = iterators; this.reason = reason; startTime = System.currentTimeMillis(); } public VolumeManager getFileSystem() { return fs; } KeyExtent getExtent() { return extent; } String getOutputFile() { return outputFile.toString(); } MajorCompactionReason getMajorCompactionReason() { return MajorCompactionReason.values()[reason]; } @Override public CompactionStats call() throws IOException, CompactionCanceledException { FileSKVWriter mfw = null; CompactionStats majCStats = new CompactionStats(); boolean remove = runningCompactions.add(this); clearStats(); final Path outputFilePath = outputFile.path(); final String outputFilePathName = outputFilePath.toString(); String oldThreadName = Thread.currentThread().getName(); String newThreadName = "MajC compacting " + extent.toString() + " started " + dateFormatter.format(new Date()) + " file: " + outputFile; Thread.currentThread().setName(newThreadName); thread = Thread.currentThread(); try { FileOperations fileFactory = FileOperations.getInstance(); FileSystem ns = this.fs.getVolumeByPath(outputFilePath).getFileSystem(); mfw = fileFactory.newWriterBuilder().forFile(outputFilePathName, ns, ns.getConf()).withTableConfiguration(acuTableConf) .withRateLimiter(env.getWriteLimiter()).build(); Map<String,Set<ByteSequence>> lGroups; try { lGroups = LocalityGroupUtil.getLocalityGroups(acuTableConf); } catch (LocalityGroupConfigurationError e) { throw new IOException(e); } long t1 = System.currentTimeMillis(); HashSet<ByteSequence> allColumnFamilies = new HashSet<>(); if (mfw.supportsLocalityGroups()) { for (Entry<String,Set<ByteSequence>> entry : lGroups.entrySet()) { setLocalityGroup(entry.getKey()); compactLocalityGroup(entry.getKey(), entry.getValue(), true, mfw, majCStats); allColumnFamilies.addAll(entry.getValue()); } } setLocalityGroup(""); compactLocalityGroup(null, allColumnFamilies, false, mfw, majCStats); long t2 = System.currentTimeMillis(); FileSKVWriter mfwTmp = mfw; mfw = null; // set this to null so we do not try to close it again in finally if the close fails try { mfwTmp.close(); // if the close fails it will cause the compaction to fail } catch (IOException ex) { if (!fs.deleteRecursively(outputFile.path())) { if (fs.exists(outputFile.path())) { log.error("Unable to delete " + outputFile); } } throw ex; } log.debug(String.format("Compaction %s %,d read | %,d written | %,6d entries/sec | %,6.3f secs | %,12d bytes | %9.3f byte/sec", extent, majCStats.getEntriesRead(), majCStats.getEntriesWritten(), (int) (majCStats.getEntriesRead() / ((t2 - t1) / 1000.0)), (t2 - t1) / 1000.0, mfwTmp.getLength(), mfwTmp.getLength() / ((t2 - t1) / 1000.0))); majCStats.setFileSize(mfwTmp.getLength()); return majCStats; } catch (IOException e) { log.error("{}", e.getMessage(), e); throw e; } catch (RuntimeException e) { log.error("{}", e.getMessage(), e); throw e; } finally { Thread.currentThread().setName(oldThreadName); if (remove) { thread = null; runningCompactions.remove(this); } try { if (mfw != null) { // compaction must not have finished successfully, so close its output file try { mfw.close(); } finally { if (!fs.deleteRecursively(outputFile.path())) if (fs.exists(outputFile.path())) log.error("Unable to delete " + outputFile); } } } catch (IOException e) { log.warn("{}", e.getMessage(), e); } catch (RuntimeException exception) { log.warn("{}", exception.getMessage(), exception); } } } private List<SortedKeyValueIterator<Key,Value>> openMapDataFiles(String lgName, ArrayList<FileSKVIterator> readers) throws IOException { List<SortedKeyValueIterator<Key,Value>> iters = new ArrayList<>(filesToCompact.size()); for (FileRef mapFile : filesToCompact.keySet()) { try { FileOperations fileFactory = FileOperations.getInstance(); FileSystem fs = this.fs.getVolumeByPath(mapFile.path()).getFileSystem(); FileSKVIterator reader; reader = fileFactory.newReaderBuilder().forFile(mapFile.path().toString(), fs, fs.getConf()).withTableConfiguration(acuTableConf) .withRateLimiter(env.getReadLimiter()).build(); readers.add(reader); SortedKeyValueIterator<Key,Value> iter = new ProblemReportingIterator(context, extent.getTableId(), mapFile.path().toString(), false, reader); if (filesToCompact.get(mapFile).isTimeSet()) { iter = new TimeSettingIterator(iter, filesToCompact.get(mapFile).getTime()); } iters.add(iter); } catch (Throwable e) { ProblemReports.getInstance(context).report(new ProblemReport(extent.getTableId(), ProblemType.FILE_READ, mapFile.path().toString(), e)); log.warn("Some problem opening map file {} {}", mapFile, e.getMessage(), e); // failed to open some map file... close the ones that were opened for (FileSKVIterator reader : readers) { try { reader.close(); } catch (Throwable e2) { log.warn("Failed to close map file", e2); } } readers.clear(); if (e instanceof IOException) throw (IOException) e; throw new IOException("Failed to open map data files", e); } } return iters; } private void compactLocalityGroup(String lgName, Set<ByteSequence> columnFamilies, boolean inclusive, FileSKVWriter mfw, CompactionStats majCStats) throws IOException, CompactionCanceledException { ArrayList<FileSKVIterator> readers = new ArrayList<>(filesToCompact.size()); Span span = Trace.start("compact"); try { long entriesCompacted = 0; List<SortedKeyValueIterator<Key,Value>> iters = openMapDataFiles(lgName, readers); if (imm != null) { iters.add(imm.compactionIterator()); } CountingIterator citr = new CountingIterator(new MultiIterator(iters, extent.toDataRange()), entriesRead); DeletingIterator delIter = new DeletingIterator(citr, propogateDeletes); ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter); // if(env.getIteratorScope() ) TabletIteratorEnvironment iterEnv; if (env.getIteratorScope() == IteratorScope.majc) iterEnv = new TabletIteratorEnvironment(IteratorScope.majc, !propogateDeletes, acuTableConf); else if (env.getIteratorScope() == IteratorScope.minc) iterEnv = new TabletIteratorEnvironment(IteratorScope.minc, acuTableConf); else throw new IllegalArgumentException(); SortedKeyValueIterator<Key,Value> itr = iterEnv.getTopLevelIterator(IteratorUtil.loadIterators(env.getIteratorScope(), cfsi, extent, acuTableConf, iterators, iterEnv)); itr.seek(extent.toDataRange(), columnFamilies, inclusive); if (!inclusive) { mfw.startDefaultLocalityGroup(); } else { mfw.startNewLocalityGroup(lgName, columnFamilies); } Span write = Trace.start("write"); try { while (itr.hasTop() && env.isCompactionEnabled()) { mfw.append(itr.getTopKey(), itr.getTopValue()); itr.next(); entriesCompacted++; if (entriesCompacted % 1024 == 0) { // Periodically update stats, do not want to do this too often since its volatile entriesWritten.addAndGet(1024); } } if (itr.hasTop() && !env.isCompactionEnabled()) { // cancel major compaction operation try { try { mfw.close(); } catch (IOException e) { log.error("{}", e.getMessage(), e); } fs.deleteRecursively(outputFile.path()); } catch (Exception e) { log.warn("Failed to delete Canceled compaction output file " + outputFile, e); } throw new CompactionCanceledException(); } } finally { CompactionStats lgMajcStats = new CompactionStats(citr.getCount(), entriesCompacted); majCStats.add(lgMajcStats); write.stop(); } } finally { // close sequence files opened for (FileSKVIterator reader : readers) { try { reader.close(); } catch (Throwable e) { log.warn("Failed to close map file", e); } } span.stop(); } } Collection<FileRef> getFilesToCompact() { return filesToCompact.keySet(); } boolean hasIMM() { return imm != null; } boolean willPropogateDeletes() { return propogateDeletes; } long getEntriesRead() { return entriesRead.get(); } long getEntriesWritten() { return entriesWritten.get(); } long getStartTime() { return startTime; } Iterable<IteratorSetting> getIterators() { return this.iterators; } MinorCompactionReason getMinCReason() { return MinorCompactionReason.values()[reason]; } }