/** * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.filter; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.util.Bytes; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.util.regex.Pattern; /** * This comparator is for use with {@link CompareFilter} implementations, such * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for * filtering based on the value of a given column. Use it to test if a given * regular expression matches a cell value in the column. * <p> * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator. * <p> * For example: * <p> * <pre> * ValueFilter vf = new ValueFilter(CompareOp.EQUAL, * new RegexStringComparator( * // v4 IP address * "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" + * "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" + * "|" + * // v6 IP address * "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" + * "{3}[\\d]{1,3})?)(\\/[0-9]+)?")); * </pre> */ public class RegexStringComparator extends WritableByteArrayComparable { private static final Log LOG = LogFactory.getLog(RegexStringComparator.class); private Charset charset = Charset.forName(HConstants.UTF8_ENCODING); private Pattern pattern; /** Nullary constructor for Writable, do not use */ public RegexStringComparator() { } /** * Constructor * @param expr a valid regular expression */ public RegexStringComparator(String expr) { super(Bytes.toBytes(expr)); this.pattern = Pattern.compile(expr, Pattern.DOTALL); } /** * Specifies the {@link Charset} to use to convert the row key to a String. * <p> * The row key needs to be converted to a String in order to be matched * against the regular expression. This method controls which charset is * used to do this conversion. * <p> * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1} * is recommended. * @param charset The charset to use. */ public void setCharset(final Charset charset) { this.charset = charset; } @Override public int compareTo(byte[] value) { // Use find() for subsequence match instead of matches() (full sequence // match) to adhere to the principle of least surprise. return pattern.matcher(new String(value, charset)).find() ? 0 : 1; } @Override public void readFields(DataInput in) throws IOException { final String expr = in.readUTF(); this.value = Bytes.toBytes(expr); this.pattern = Pattern.compile(expr); final String charset = in.readUTF(); if (charset.length() > 0) { try { this.charset = Charset.forName(charset); } catch (IllegalCharsetNameException e) { LOG.error("invalid charset", e); } } } @Override public void write(DataOutput out) throws IOException { out.writeUTF(pattern.toString()); out.writeUTF(charset.name()); } }