// LILY SPECIFIC NOTES: // // This class has been copied from HBase and adjusted to deal with metadata and field flags. // Rather then writing an entirely new filter specific for Lily field values, we've kept it // as close to the original as possible. In the future it might make sense to do something // completely different. // // The parts changed for Lily are marked with "Lily change" // /** * This class was copied from the HBase source code and modified for use * with Lily by NGDATA nv, 2013. * * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.lilyproject.repository.impl.hbase; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import com.google.common.base.Preconditions; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterBase; import org.apache.hadoop.hbase.filter.ParseFilter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.filter.SubstringComparator; import org.apache.hadoop.hbase.filter.ValueFilter; import org.apache.hadoop.hbase.filter.WritableByteArrayComparable; import org.apache.hadoop.hbase.io.HbaseObjectWritable; import org.apache.hadoop.hbase.util.Bytes; import org.lilyproject.repository.impl.FieldFlags; /** * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp} * operator (equal, greater, not equal, etc), and either a byte [] value or * a WritableByteArrayComparable. * <p> * If we have a byte [] value then we just do a lexicographic compare. For * example, if passed value is 'b' and cell has 'a' and the compare operator * is LESS, then we will filter out this cell (return true). If this is not * sufficient (eg you want to deserialize a long and then compare it to a fixed * long value), then you can pass in your own comparator instead. * <p> * You must also specify a family and qualifier. Only the value of this column * will be tested. When using this filter on a {@link Scan} with specified * inputs, the column to be tested should also be added as input (otherwise * the filter will regard the column as missing). * <p> * To prevent the entire row from being emitted if the column is not found * on a row, use {@link #setFilterIfMissing}. * Otherwise, if the column is found, the entire row will be emitted only if * the value passes. If the value fails, the row will be filtered out. * <p> * In order to test values of previous versions (timestamps), set * {@link #setLatestVersionOnly} to false. The default is true, meaning that * only the latest version's value is tested and all previous versions are ignored. * <p> * To filter based on the value of all scanned columns, use {@link ValueFilter}. */ public class LilyFieldSingleColumnValueFilter extends FilterBase { static final Log LOG = LogFactory.getLog(LilyFieldSingleColumnValueFilter.class); protected byte [] columnFamily; protected byte [] columnQualifier; private CompareFilter.CompareOp compareOp; private WritableByteArrayComparable comparator; private boolean foundColumn = false; private boolean matchedColumn = false; private boolean filterIfMissing = false; private boolean latestVersionOnly = true; /** * Writable constructor, do not use. */ public LilyFieldSingleColumnValueFilter() { } /** * Constructor for binary compare of the value of a single column. If the * column is found and the condition passes, all columns of the row will be * emitted. If the condition fails, the row will not be emitted. * <p> * Use the filterIfColumnMissing flag to set whether the rest of the columns * in a row will be emitted if the specified column to check is not found in * the row. * * @param family name of column family * @param qualifier name of column qualifier * @param compareOp operator * @param value value to compare column values against */ public LilyFieldSingleColumnValueFilter(final byte [] family, final byte [] qualifier, final CompareFilter.CompareOp compareOp, final byte[] value) { this(family, qualifier, compareOp, new BinaryComparator(value)); } /** * Constructor for binary compare of the value of a single column. If the * column is found and the condition passes, all columns of the row will be * emitted. If the condition fails, the row will not be emitted. * <p> * Use the filterIfColumnMissing flag to set whether the rest of the columns * in a row will be emitted if the specified column to check is not found in * the row. * * @param family name of column family * @param qualifier name of column qualifier * @param compareOp operator * @param comparator Comparator to use. */ public LilyFieldSingleColumnValueFilter(final byte [] family, final byte [] qualifier, final CompareFilter.CompareOp compareOp, final WritableByteArrayComparable comparator) { this.columnFamily = family; this.columnQualifier = qualifier; this.compareOp = compareOp; this.comparator = comparator; } /** * @return operator */ public CompareFilter.CompareOp getOperator() { return compareOp; } /** * @return the comparator */ public WritableByteArrayComparable getComparator() { return comparator; } /** * @return the family */ public byte[] getFamily() { return columnFamily; } /** * @return the qualifier */ public byte[] getQualifier() { return columnQualifier; } public ReturnCode filterKeyValue(KeyValue keyValue) { // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue())); if (this.matchedColumn) { // We already found and matched the single column, all keys now pass return ReturnCode.INCLUDE; } else if (this.latestVersionOnly && this.foundColumn) { // We found but did not match the single column, skip to next row return ReturnCode.NEXT_ROW; } if (!keyValue.matchingColumn(this.columnFamily, this.columnQualifier)) { return ReturnCode.INCLUDE; } foundColumn = true; if (filterColumnValue(keyValue.getBuffer(), keyValue.getValueOffset(), keyValue.getValueLength())) { return this.latestVersionOnly? ReturnCode.NEXT_ROW: ReturnCode.INCLUDE; } this.matchedColumn = true; return ReturnCode.INCLUDE; } private boolean filterColumnValue(final byte [] data, final int offset, final int length) { // Begin Lily change if (!FieldFlags.exists(data[offset])) { // a field with deleted marker is the same as a missing field return filterIfMissing; } // Find out if there's metadata appended to the field and if so adjust length so that the metadata is not // part of the comparison. int metadataEncodingVersion = FieldFlags.getFieldMetadataVersion(data[offset]); int lilyFieldValueOffset; int lilyFieldValueLength; if (metadataEncodingVersion == 0) { // there is no metadata lilyFieldValueOffset = offset + 1; lilyFieldValueLength = length - 1; } else if (metadataEncodingVersion == 1) { int metadataSize = Bytes.toInt(data, offset + length - Bytes.SIZEOF_INT, Bytes.SIZEOF_INT); lilyFieldValueOffset = offset + 1; // +1 to skip the field flags lilyFieldValueLength = length - 1 - metadataSize - Bytes.SIZEOF_INT; } else { throw new RuntimeException("Unsupported field metadata encoding version: " + metadataEncodingVersion); } int compareResult = this.comparator.compareTo(data, lilyFieldValueOffset, lilyFieldValueLength); // End Lily change switch (this.compareOp) { case LESS: return compareResult <= 0; case LESS_OR_EQUAL: return compareResult < 0; case EQUAL: return compareResult != 0; case NOT_EQUAL: return compareResult == 0; case GREATER_OR_EQUAL: return compareResult > 0; case GREATER: return compareResult >= 0; default: throw new RuntimeException("Unknown Compare op " + compareOp.name()); } } public boolean filterRow() { // If column was found, return false if it was matched, true if it was not // If column not found, return true if we filter if missing, false if not return this.foundColumn? !this.matchedColumn: this.filterIfMissing; } public void reset() { foundColumn = false; matchedColumn = false; } /** * Get whether entire row should be filtered if column is not found. * @return true if row should be skipped if column not found, false if row * should be let through anyways */ public boolean getFilterIfMissing() { return filterIfMissing; } /** * Set whether entire row should be filtered if column is not found. * <p> * If true, the entire row will be skipped if the column is not found. * <p> * If false, the row will pass if the column is not found. This is default. * @param filterIfMissing flag */ public void setFilterIfMissing(boolean filterIfMissing) { this.filterIfMissing = filterIfMissing; } /** * Get whether only the latest version of the column value should be compared. * If true, the row will be returned if only the latest version of the column * value matches. If false, the row will be returned if any version of the * column value matches. The default is true. * @return return value */ public boolean getLatestVersionOnly() { return latestVersionOnly; } /** * Set whether only the latest version of the column value should be compared. * If true, the row will be returned if only the latest version of the column * value matches. If false, the row will be returned if any version of the * column value matches. The default is true. * @param latestVersionOnly flag */ public void setLatestVersionOnly(boolean latestVersionOnly) { this.latestVersionOnly = latestVersionOnly; } public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) { Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6, "Expected 4 or 6 but got: %s", filterArguments.size()); byte [] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0)); byte [] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1)); CompareFilter.CompareOp compareOp = ParseFilter.createCompareOp(filterArguments.get(2)); WritableByteArrayComparable comparator = ParseFilter.createComparator( ParseFilter.removeQuotesFromByteArray(filterArguments.get(3))); if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) { if (compareOp != CompareFilter.CompareOp.EQUAL && compareOp != CompareFilter.CompareOp.NOT_EQUAL) { throw new IllegalArgumentException ("A regexstring comparator and substring comparator " + "can only be used with EQUAL and NOT_EQUAL"); } } SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, compareOp, comparator); if (filterArguments.size() == 6) { boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4)); boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5)); filter.setFilterIfMissing(filterIfMissing); filter.setLatestVersionOnly(latestVersionOnly); } return filter; } public void readFields(final DataInput in) throws IOException { this.columnFamily = Bytes.readByteArray(in); if(this.columnFamily.length == 0) { this.columnFamily = null; } this.columnQualifier = Bytes.readByteArray(in); if(this.columnQualifier.length == 0) { this.columnQualifier = null; } this.compareOp = CompareFilter.CompareOp.valueOf(in.readUTF()); this.comparator = (WritableByteArrayComparable) HbaseObjectWritable.readObject(in, null); this.foundColumn = in.readBoolean(); this.matchedColumn = in.readBoolean(); this.filterIfMissing = in.readBoolean(); this.latestVersionOnly = in.readBoolean(); } public void write(final DataOutput out) throws IOException { Bytes.writeByteArray(out, this.columnFamily); Bytes.writeByteArray(out, this.columnQualifier); out.writeUTF(compareOp.name()); HbaseObjectWritable.writeObject(out, comparator, WritableByteArrayComparable.class, null); out.writeBoolean(foundColumn); out.writeBoolean(matchedColumn); out.writeBoolean(filterIfMissing); out.writeBoolean(latestVersionOnly); } @Override public String toString() { return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(), Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier), this.compareOp.name(), Bytes.toStringBinary(this.comparator.getValue())); } }