/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.gc.replication; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map.Entry; import java.util.Set; import org.apache.accumulo.core.client.BatchScanner; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.BatchWriterConfig; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.MutationsRejectedException; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.file.rfile.RFile; import org.apache.accumulo.core.master.thrift.MasterClientService; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.schema.MetadataSchema; import org.apache.accumulo.core.metadata.schema.MetadataSchema.ReplicationSection; import org.apache.accumulo.core.replication.ReplicationTable; import org.apache.accumulo.core.rpc.ThriftUtil; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.tabletserver.thrift.TabletClientService; import org.apache.accumulo.core.trace.Span; import org.apache.accumulo.core.trace.Trace; import org.apache.accumulo.core.trace.thrift.TInfo; import org.apache.accumulo.server.AccumuloServerContext; import org.apache.accumulo.server.log.WalStateManager; import org.apache.accumulo.server.log.WalStateManager.WalMarkerException; import org.apache.accumulo.server.log.WalStateManager.WalState; import org.apache.accumulo.server.replication.StatusUtil; import org.apache.accumulo.server.replication.proto.Replication.Status; import org.apache.accumulo.server.zookeeper.ZooReaderWriter; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Stopwatch; import com.google.common.net.HostAndPort; import com.google.protobuf.InvalidProtocolBufferException; /** * It's impossible to know when all references to a WAL have been removed from the metadata table as the references are potentially spread across the entire * tablets row-space. * <p> * This tool scans the metadata table to collect a set of WALs that are still referenced. Then, each {@link Status} record from the metadata and replication * tables that point to that WAL can be "closed", by writing a new Status to the same key with the closed member true. */ public class CloseWriteAheadLogReferences implements Runnable { private static final Logger log = LoggerFactory.getLogger(CloseWriteAheadLogReferences.class); private static final String RFILE_SUFFIX = "." + RFile.EXTENSION; private final AccumuloServerContext context; public CloseWriteAheadLogReferences(AccumuloServerContext context) { this.context = context; } @Override public void run() { // As long as we depend on a newer Guava than Hadoop uses, we have to make sure we're compatible with // what the version they bundle uses. Stopwatch sw = new Stopwatch(); Connector conn; try { conn = context.getConnector(); } catch (Exception e) { log.error("Could not create connector", e); throw new RuntimeException(e); } if (!ReplicationTable.isOnline(conn)) { log.debug("Replication table isn't online, not attempting to clean up wals"); return; } Span findWalsSpan = Trace.start("findReferencedWals"); HashSet<String> closed = null; try { sw.start(); closed = getClosedLogs(conn); } finally { sw.stop(); findWalsSpan.stop(); } log.info("Found " + closed.size() + " WALs referenced in metadata in " + sw.toString()); sw.reset(); Span updateReplicationSpan = Trace.start("updateReplicationTable"); long recordsClosed = 0; try { sw.start(); recordsClosed = updateReplicationEntries(conn, closed); } finally { sw.stop(); updateReplicationSpan.stop(); } log.info("Closed " + recordsClosed + " WAL replication references in replication table in " + sw.toString()); } /** * Construct the set of referenced WALs from zookeeper * * @param conn * Connector * @return The Set of WALs that are referenced in the metadata table */ protected HashSet<String> getClosedLogs(Connector conn) { WalStateManager wals = new WalStateManager(conn.getInstance(), ZooReaderWriter.getInstance()); HashSet<String> result = new HashSet<>(); try { for (Entry<Path,WalState> entry : wals.getAllState().entrySet()) { if (entry.getValue() == WalState.UNREFERENCED || entry.getValue() == WalState.CLOSED) { Path path = entry.getKey(); log.debug("Found closed WAL " + path.toString()); result.add(path.toString()); } } } catch (WalMarkerException e) { throw new RuntimeException(e); } return result; } /** * Given the set of WALs which have references in the metadata table, close any status messages with reference that WAL. * * @param conn * Connector * @param closedWals * {@link Set} of paths to WALs that marked as closed or unreferenced in zookeeper */ protected long updateReplicationEntries(Connector conn, Set<String> closedWals) { BatchScanner bs = null; BatchWriter bw = null; long recordsClosed = 0; try { bw = conn.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig()); bs = conn.createBatchScanner(MetadataTable.NAME, Authorizations.EMPTY, 4); bs.setRanges(Collections.singleton(Range.prefix(ReplicationSection.getRowPrefix()))); bs.fetchColumnFamily(ReplicationSection.COLF); Text replFileText = new Text(); for (Entry<Key,Value> entry : bs) { Status status; try { status = Status.parseFrom(entry.getValue().get()); } catch (InvalidProtocolBufferException e) { log.error("Could not parse Status protobuf for {}", entry.getKey(), e); continue; } // Ignore things that aren't completely replicated as we can't delete those anyways MetadataSchema.ReplicationSection.getFile(entry.getKey(), replFileText); String replFile = replFileText.toString(); boolean isClosed = closedWals.contains(replFile); // We only want to clean up WALs (which is everything but rfiles) and only when // metadata doesn't have a reference to the given WAL if (!status.getClosed() && !replFile.endsWith(RFILE_SUFFIX) && isClosed) { try { closeWal(bw, entry.getKey()); recordsClosed++; } catch (MutationsRejectedException e) { log.error("Failed to submit delete mutation for " + entry.getKey()); continue; } } } } catch (TableNotFoundException e) { log.error("Replication table was deleted", e); } finally { if (null != bs) { bs.close(); } if (null != bw) { try { bw.close(); } catch (MutationsRejectedException e) { log.error("Failed to write delete mutations for replication table", e); } } } return recordsClosed; } /** * Write a closed {@link Status} mutation for the given {@link Key} using the provided {@link BatchWriter} * * @param bw * BatchWriter * @param k * Key to create close mutation from */ protected void closeWal(BatchWriter bw, Key k) throws MutationsRejectedException { log.debug("Closing unreferenced WAL ({}) in metadata table", k.toStringNoTruncate()); Mutation m = new Mutation(k.getRow()); m.put(k.getColumnFamily(), k.getColumnQualifier(), StatusUtil.fileClosedValue()); bw.addMutation(m); } private HostAndPort getMasterAddress() { try { List<String> locations = context.getInstance().getMasterLocations(); if (locations.size() == 0) return null; return HostAndPort.fromString(locations.get(0)); } catch (Exception e) { log.warn("Failed to obtain master host " + e); } return null; } private MasterClientService.Client getMasterConnection() { final HostAndPort address = getMasterAddress(); try { if (address == null) { log.warn("Could not fetch Master address"); return null; } return ThriftUtil.getClient(new MasterClientService.Client.Factory(), address, context); } catch (Exception e) { log.warn("Issue with masterConnection (" + address + ") " + e, e); } return null; } /** * Get the active tabletservers as seen by the master. * * @return The active tabletservers, null if they can't be computed. */ protected List<String> getActiveTservers(TInfo tinfo) { MasterClientService.Client client = null; List<String> tservers = null; try { client = getMasterConnection(); // Could do this through InstanceOperations, but that would set a bunch of new Watchers via ZK on every tserver // node. The master is already tracking all of this info, so hopefully this is less overall work. if (null != client) { tservers = client.getActiveTservers(tinfo, context.rpcCreds()); } } catch (TException e) { // If we can't fetch the tabletservers, we can't fetch any active WALs log.warn("Failed to fetch active tabletservers from the master", e); return null; } finally { ThriftUtil.returnClient(client); } return tservers; } protected List<String> getActiveWalsForServer(TInfo tinfo, HostAndPort server) { TabletClientService.Client tserverClient = null; try { tserverClient = ThriftUtil.getClient(new TabletClientService.Client.Factory(), server, context); return tserverClient.getActiveLogs(tinfo, context.rpcCreds()); } catch (TTransportException e) { log.warn("Failed to fetch active write-ahead logs from " + server, e); return null; } catch (TException e) { log.warn("Failed to fetch active write-ahead logs from " + server, e); return null; } finally { ThriftUtil.returnClient(tserverClient); } } }