/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.blur.lucene.security.accumulo;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.TreeSet;
/**
* This code was originally create in the Apache Accumulo project. It has
* been slightly modified from it's original form to allow for easier reuse.
*/
/**
* Validate the column visibility is a valid expression and set the visibility
* for a Mutation. See {@link ColumnVisibility#ColumnVisibility(byte[])} for the
* definition of an expression.
*
* <P>
* The expression is a sequence of characters from the set [A-Za-z0-9_-.] along
* with the binary operators "&" and "|" indicating that both operands are
* necessary, or the either is necessary. The following are valid expressions
* for visibility:
*
* <pre>
* A
* A|B
* (A|B)&(C|D)
* orange|(red&yellow)
* </pre>
*
* <P>
* The following are not valid expressions for visibility:
*
* <pre>
* A|B&C
* A=B
* A|B|
* A&|B
* ()
* )
* dog|!cat
* </pre>
*
* <P>
* In addition to the base set of visibilities, any character can be used in the
* expression if it is quoted. If the quoted term contains '"' or '\', then
* escape the character with '\'. The {@link #quote(String)} method can be used
* to properly quote and escape terms automatically. The following is an example
* of a quoted term:
*
* <pre>
* "A#C"<span />&<span />B
* </pre>
*/
public class ColumnVisibility {
Node node = null;
private byte[] expression;
/**
* Accessor for the underlying byte string.
*
* @return byte array representation of a visibility expression
*/
public byte[] getExpression() {
return expression;
}
/**
* The node types in a parse tree for a visibility expression.
*/
public static enum NodeType {
EMPTY, TERM, OR, AND,
}
/**
* All empty nodes are equal and represent the same value.
*/
private static final Node EMPTY_NODE = new Node(NodeType.EMPTY, 0);
/**
* A node in the parse tree for a visibility expression.
*/
public static class Node {
/**
* An empty list of nodes.
*/
public final static List<Node> EMPTY = Collections.emptyList();
NodeType type;
int start;
int end;
List<Node> children = EMPTY;
public Node(NodeType type, int start) {
this.type = type;
this.start = start;
this.end = start + 1;
}
public Node(int start, int end) {
this.type = NodeType.TERM;
this.start = start;
this.end = end;
}
public void add(Node child) {
if (children == EMPTY)
children = new ArrayList<Node>();
children.add(child);
}
public NodeType getType() {
return type;
}
public List<Node> getChildren() {
return children;
}
public int getTermStart() {
return start;
}
public int getTermEnd() {
return end;
}
public ByteSequence getTerm(byte expression[]) {
if (type != NodeType.TERM)
throw new RuntimeException();
if (expression[start] == '"') {
// its a quoted term
int qStart = start + 1;
int qEnd = end - 1;
return new ArrayByteSequence(expression, qStart, qEnd - qStart);
}
return new ArrayByteSequence(expression, start, end - start);
}
}
/**
* A node comparator. Nodes sort according to node type, terms sort
* lexicographically. AND and OR nodes sort by number of children, or if the
* same by corresponding children.
*/
public static class NodeComparator implements Comparator<Node>, Serializable {
private static final long serialVersionUID = 1L;
byte[] text;
/**
* Creates a new comparator.
*
* @param text
* expression string, encoded in UTF-8
*/
public NodeComparator(byte[] text) {
this.text = text;
}
@Override
public int compare(Node a, Node b) {
int diff = a.type.ordinal() - b.type.ordinal();
if (diff != 0)
return diff;
switch (a.type) {
case EMPTY:
return 0; // All empty nodes are the same
case TERM:
return compareBytes(text, a.start, a.end - a.start, text, b.start, b.end - b.start);
case OR:
case AND:
diff = a.children.size() - b.children.size();
if (diff != 0)
return diff;
for (int i = 0; i < a.children.size(); i++) {
diff = compare(a.children.get(i), b.children.get(i));
if (diff != 0)
return diff;
}
}
return 0;
}
}
public static int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
int end1 = s1 + l1;
int end2 = s2 + l2;
for (int i = s1, j = s2; i < end1 && j < end2; i++, j++) {
int a = (b1[i] & 0xff);
int b = (b2[j] & 0xff);
if (a != b) {
return a - b;
}
}
return l1 - l2;
}
/*
* Convience method that delegates to normalize with a new NodeComparator
* constructed using the supplied expression.
*/
public static Node normalize(Node root, byte[] expression) {
return normalize(root, expression, new NodeComparator(expression));
}
// @formatter:off
/*
* Walks an expression's AST in order to: 1) roll up expressions with the same
* operant (`a&(b&c) becomes a&b&c`) 2) sorts labels lexicographically
* (permutations of `a&b&c` are re-ordered to appear as `a&b&c`) 3) dedupes
* labels (`a&b&a` becomes `a&b`)
*/
// @formatter:on
public static Node normalize(Node root, byte[] expression, NodeComparator comparator) {
if (root.type != NodeType.TERM) {
TreeSet<Node> rolledUp = new TreeSet<Node>(comparator);
java.util.Iterator<Node> itr = root.children.iterator();
while (itr.hasNext()) {
Node c = normalize(itr.next(), expression, comparator);
if (c.type == root.type) {
rolledUp.addAll(c.children);
itr.remove();
}
}
rolledUp.addAll(root.children);
root.children.clear();
root.children.addAll(rolledUp);
// need to promote a child if it's an only child
if (root.children.size() == 1) {
return root.children.get(0);
}
}
return root;
}
/*
* Walks an expression's AST and appends a string representation to a supplied
* StringBuilder. This method adds parens where necessary.
*/
public static void stringify(Node root, byte[] expression, StringBuilder out) {
if (root.type == NodeType.TERM) {
out.append(UTFUtil.toString(expression, root.start, root.end - root.start));
} else {
String sep = "";
for (Node c : root.children) {
out.append(sep);
boolean parens = (c.type != NodeType.TERM && root.type != c.type);
if (parens)
out.append("(");
stringify(c, expression, out);
if (parens)
out.append(")");
sep = root.type == NodeType.AND ? "&" : "|";
}
}
}
/**
* Generates a byte[] that represents a normalized, but logically equivalent,
* form of this evaluator's expression.
*
* @return normalized expression in byte[] form
*/
public byte[] flatten() {
Node normRoot = normalize(node, expression);
StringBuilder builder = new StringBuilder(expression.length);
stringify(normRoot, expression, builder);
return UTFUtil.toBytes(builder.toString());
}
private static class ColumnVisibilityParser {
private int index = 0;
private int parens = 0;
public ColumnVisibilityParser() {
}
Node parse(byte[] expression) {
if (expression.length > 0) {
Node node = parse_(expression);
if (node == null) {
throw new BadArgumentException("operator or missing parens", UTFUtil.toString(expression), index - 1);
}
if (parens != 0) {
throw new BadArgumentException("parenthesis mis-match", UTFUtil.toString(expression), index - 1);
}
return node;
}
return null;
}
Node processTerm(int start, int end, Node expr, byte[] expression) {
if (start != end) {
if (expr != null)
throw new BadArgumentException("expression needs | or &", UTFUtil.toString(expression), start);
return new Node(start, end);
}
if (expr == null)
throw new BadArgumentException("empty term", UTFUtil.toString(expression), start);
return expr;
}
Node parse_(byte[] expression) {
Node result = null;
Node expr = null;
int wholeTermStart = index;
int subtermStart = index;
boolean subtermComplete = false;
while (index < expression.length) {
switch (expression[index++]) {
case '&': {
expr = processTerm(subtermStart, index - 1, expr, expression);
if (result != null) {
if (!result.type.equals(NodeType.AND))
throw new BadArgumentException("cannot mix & and |", UTFUtil.toString(expression), index - 1);
} else {
result = new Node(NodeType.AND, wholeTermStart);
}
result.add(expr);
expr = null;
subtermStart = index;
subtermComplete = false;
break;
}
case '|': {
expr = processTerm(subtermStart, index - 1, expr, expression);
if (result != null) {
if (!result.type.equals(NodeType.OR))
throw new BadArgumentException("cannot mix | and &", UTFUtil.toString(expression), index - 1);
} else {
result = new Node(NodeType.OR, wholeTermStart);
}
result.add(expr);
expr = null;
subtermStart = index;
subtermComplete = false;
break;
}
case '(': {
parens++;
if (subtermStart != index - 1 || expr != null)
throw new BadArgumentException("expression needs & or |", UTFUtil.toString(expression), index - 1);
expr = parse_(expression);
subtermStart = index;
subtermComplete = false;
break;
}
case ')': {
parens--;
Node child = processTerm(subtermStart, index - 1, expr, expression);
if (child == null && result == null)
throw new BadArgumentException("empty expression not allowed", UTFUtil.toString(expression), index);
if (result == null)
return child;
if (result.type == child.type)
for (Node c : child.children)
result.add(c);
else
result.add(child);
result.end = index - 1;
return result;
}
case '"': {
if (subtermStart != index - 1)
throw new BadArgumentException("expression needs & or |", UTFUtil.toString(expression), index - 1);
while (index < expression.length && expression[index] != '"') {
if (expression[index] == '\\') {
index++;
if (expression[index] != '\\' && expression[index] != '"')
throw new BadArgumentException("invalid escaping within quotes", UTFUtil.toString(expression),
index - 1);
}
index++;
}
if (index == expression.length)
throw new BadArgumentException("unclosed quote", UTFUtil.toString(expression), subtermStart);
if (subtermStart + 1 == index)
throw new BadArgumentException("empty term", UTFUtil.toString(expression), subtermStart);
index++;
subtermComplete = true;
break;
}
default: {
if (subtermComplete)
throw new BadArgumentException("expression needs & or |", UTFUtil.toString(expression), index - 1);
byte c = expression[index - 1];
if (!Authorizations.isValidAuthChar(c))
throw new BadArgumentException("bad character (" + c + ")", UTFUtil.toString(expression), index - 1);
}
}
}
Node child = processTerm(subtermStart, index, expr, expression);
if (result != null) {
result.add(child);
result.end = index;
} else
result = child;
if (result.type != NodeType.TERM)
if (result.children.size() < 2)
throw new BadArgumentException("missing term", UTFUtil.toString(expression), index);
return result;
}
}
private void validate(byte[] expression) {
if (expression != null && expression.length > 0) {
ColumnVisibilityParser p = new ColumnVisibilityParser();
node = p.parse(expression);
} else {
node = EMPTY_NODE;
}
this.expression = expression;
}
/**
* Creates an empty visibility. Normally, elements with empty visibility can
* be seen by everyone. Though, one could change this behavior with filters.
*
* @see #ColumnVisibility(String)
*/
public ColumnVisibility() {
this(new byte[] {});
}
/**
* Creates a column visibility for a Mutation.
*
* @param expression
* An expression of the rights needed to see this mutation. The
* expression syntax is defined at the class-level documentation
*/
public ColumnVisibility(String expression) {
this(UTFUtil.toBytes(expression));
}
/**
* Creates a column visibility for a Mutation from a string already encoded in
* UTF-8 bytes.
*
* @param expression
* visibility expression, encoded as UTF-8 bytes
* @see #ColumnVisibility(String)
*/
public ColumnVisibility(byte[] expression) {
validate(expression);
}
@Override
public String toString() {
return "[" + UTFUtil.toString(expression) + "]";
}
/**
* See {@link #equals(ColumnVisibility)}
*/
@Override
public boolean equals(Object obj) {
if (obj instanceof ColumnVisibility)
return equals((ColumnVisibility) obj);
return false;
}
/**
* Compares two ColumnVisibilities for string equivalence, not as a meaningful
* comparison of terms and conditions.
*
* @param otherLe
* other column visibility
* @return true if this visibility equals the other via string comparison
*/
public boolean equals(ColumnVisibility otherLe) {
return Arrays.equals(expression, otherLe.expression);
}
@Override
public int hashCode() {
return Arrays.hashCode(expression);
}
/**
* Gets the parse tree for this column visibility.
*
* @return parse tree node
*/
public Node getParseTree() {
return node;
}
/**
* Properly quotes terms in a column visibility expression. If no quoting is
* needed, then nothing is done.
*
* <p>
* Examples of using quote :
*
* <pre>
* import static org.apache.accumulo.core.security.ColumnVisibility.quote;
* .
* .
* .
* ColumnVisibility cv = new ColumnVisibility(quote("A#C") + "&" + quote("FOO"));
* </pre>
*
* @param term
* term to quote
* @return quoted term (unquoted if unnecessary)
*/
public static String quote(String term) {
return UTFUtil.toString(quote(UTFUtil.toBytes(term)));
}
/**
* Properly quotes terms in a column visibility expression. If no quoting is
* needed, then nothing is done.
*
* @param term
* term to quote, encoded as UTF-8 bytes
* @return quoted term (unquoted if unnecessary), encoded as UTF-8 bytes
* @see #quote(String)
*/
public static byte[] quote(byte[] term) {
boolean needsQuote = false;
for (int i = 0; i < term.length; i++) {
if (!Authorizations.isValidAuthChar(term[i])) {
needsQuote = true;
break;
}
}
if (!needsQuote)
return term;
return VisibilityEvaluator.escape(term, true);
}
}