/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.core.iterators.user; import static java.nio.charset.StandardCharsets.UTF_8; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import java.util.Map; import org.apache.accumulo.core.data.ArrayByteSequence; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.IteratorEnvironment; import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; /** * An iterator for deleting whole rows. * * After setting this iterator up for your table, to delete a row insert a row with empty column family, empty column qualifier, empty column visibility, and a * value of DEL_ROW. Do not use empty columns for anything else when using this iterator. * * When using this iterator the locality group containing the row deletes will always be read. The locality group containing the empty column family will * contain row deletes. Always reading this locality group can have an impact on performance. * * For example assume there are two locality groups, one containing large images and one containing small metadata about the images. If row deletes are in the * same locality group as the images, then this will significantly slow down scans and major compactions that are only reading the metadata locality group. * Therefore, you would want to put the empty column family in the locality group that contains the metadata. Another option is to put the empty column in its * own locality group. Which is best depends on your data. * */ public class RowDeletingIterator implements SortedKeyValueIterator<Key,Value> { public static final Value DELETE_ROW_VALUE = new Value("DEL_ROW".getBytes(UTF_8)); private SortedKeyValueIterator<Key,Value> source; private boolean propogateDeletes; private ByteSequence currentRow; private boolean currentRowDeleted; private long deleteTS; private boolean dropEmptyColFams; private static final ByteSequence EMPTY = new ArrayByteSequence(new byte[] {}); private RowDeletingIterator(SortedKeyValueIterator<Key,Value> source, boolean propogateDeletes2) { this.source = source; this.propogateDeletes = propogateDeletes2; } public RowDeletingIterator() {} @Override public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) { return new RowDeletingIterator(source.deepCopy(env), propogateDeletes); } @Override public Key getTopKey() { return source.getTopKey(); } @Override public Value getTopValue() { return source.getTopValue(); } @Override public boolean hasTop() { return source.hasTop(); } @Override public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException { this.source = source; this.propogateDeletes = (env.getIteratorScope() == IteratorScope.majc && !env.isFullMajorCompaction()) || env.getIteratorScope() == IteratorScope.minc; } @Override public void next() throws IOException { source.next(); consumeDeleted(); consumeEmptyColFams(); } private void consumeEmptyColFams() throws IOException { while (dropEmptyColFams && source.hasTop() && source.getTopKey().getColumnFamilyData().length() == 0) { source.next(); consumeDeleted(); } } private boolean isDeleteMarker(Key key, Value val) { return key.getColumnFamilyData().length() == 0 && key.getColumnQualifierData().length() == 0 && key.getColumnVisibilityData().length() == 0 && val.equals(DELETE_ROW_VALUE); } private void consumeDeleted() throws IOException { // this method tries to do as little work as possible when nothing is deleted while (source.hasTop()) { if (currentRowDeleted) { while (source.hasTop() && currentRow.equals(source.getTopKey().getRowData()) && source.getTopKey().getTimestamp() <= deleteTS) { source.next(); } if (source.hasTop() && !currentRow.equals(source.getTopKey().getRowData())) { currentRowDeleted = false; } } if (!currentRowDeleted && source.hasTop() && isDeleteMarker(source.getTopKey(), source.getTopValue())) { currentRow = source.getTopKey().getRowData(); currentRowDeleted = true; deleteTS = source.getTopKey().getTimestamp(); if (propogateDeletes) break; } else { break; } } } @Override public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException { if (inclusive && !columnFamilies.contains(EMPTY)) { columnFamilies = new HashSet<>(columnFamilies); columnFamilies.add(EMPTY); dropEmptyColFams = true; } else if (!inclusive && columnFamilies.contains(EMPTY)) { columnFamilies = new HashSet<>(columnFamilies); columnFamilies.remove(EMPTY); dropEmptyColFams = true; } else { dropEmptyColFams = false; } currentRowDeleted = false; if (range.getStartKey() != null) { // seek to beginning of row Range newRange = new Range(new Key(range.getStartKey().getRow()), true, range.getEndKey(), range.isEndKeyInclusive()); source.seek(newRange, columnFamilies, inclusive); consumeDeleted(); consumeEmptyColFams(); if (source.hasTop() && range.beforeStartKey(source.getTopKey())) { source.seek(range, columnFamilies, inclusive); consumeDeleted(); consumeEmptyColFams(); } } else { source.seek(range, columnFamilies, inclusive); consumeDeleted(); consumeEmptyColFams(); } } }