/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ package org.apache.directory.studio.ldapbrowser.core.model.filter.parser; /** * The LdapFilterScanner is a scanner for LDAP filters. * * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a> */ public class LdapFilterScanner { // From RFC 2254: // ------------- // The string representation of an LDAP search filter is defined by the // following grammar, following the ABNF notation defined in [5]. The // filter format uses a prefix notation. // // filter = "(" filtercomp ")" // filtercomp = and / or / not / item // and = "&" filterlist // or = "|" filterlist // not = "!" filter // filterlist = 1*filter // item = simple / present / substring / extensible // simple = attr filtertype value // filtertype = equal / approx / greater / less // equal = "=" // approx = "~=" // greater = ">=" // less = "<=" // extensible = attr [":dn"] [":" matchingrule] ":=" value // / [":dn"] ":" matchingrule ":=" value // present = attr "=*" // substring = attr "=" [initial] any [final] // initial = value // any = "*" *(value "*") // final = value // attr = AttributeDescription from Section 4.1.5 of [1] // matchingrule = MatchingRuleId from Section 4.1.9 of [1] // value = AttributeValue from Section 4.1.6 of [1] // // The attr, matchingrule, and value constructs are as described in the // corresponding section of [1] given above. // // If a value should contain any of the following characters // // Character ASCII value // --------------------------- // * 0x2a // ( 0x28 // ) 0x29 // \ 0x5c // NUL 0x00 // // the character must be encoded as the backslash '\' character (ASCII // 0x5c) followed by the two hexadecimal digits representing the ASCII // value of the encoded character. The case of the two hexadecimal // digits is not significant. /** The filter to scan */ private String filter; /** The current position */ private int pos; /** The last token type. */ private int lastTokenType; /** * Creates a new instance of LdapFilterScanner. */ public LdapFilterScanner() { super(); this.filter = ""; //$NON-NLS-1$ } /** * Resets this scanner. * * @param filter the new filter to scan */ public void reset( String filter ) { this.filter = filter; this.pos = -1; this.lastTokenType = LdapFilterToken.NEW; } /** * Gets the character at the current position. * * @return the character at the current position */ private char currentChar() { return 0 <= pos && pos < filter.length() ? filter.charAt( pos ) : '\u0000'; } /** * Increments the position counter and gets * the character at that positon. * * @return the character at the next position */ private char nextChar() { pos++; return currentChar(); } /** * Decrements the position counter and gets * the character at that positon. * * @return the character at the previous position */ private char prevChar() { pos--; return currentChar(); } /** * Increments the position counter as long as there are * line breaks and gets the character at that positon. * * @return the character at the next position */ private char nextNonLinebreakChar() { while ( nextChar() == '\n' ); return currentChar(); } /** * Decrements the position counter as long as there are * line breaks and gets the character at that positon. * * @return the character at the previous position */ private char prevNonLinebreakChar() { while ( prevChar() == '\n' ); return currentChar(); } /** * Gets the next token. * * @return the next token */ public LdapFilterToken nextToken() { char c; // check for EOF c = nextChar(); if ( c == '\u0000' ) { return new LdapFilterToken( LdapFilterToken.EOF, "", pos ); //$NON-NLS-1$ } else { prevChar(); } // check for ignorable whitespaces c = nextChar(); if ( Character.isWhitespace( c ) && ( lastTokenType == LdapFilterToken.RPAR || lastTokenType == LdapFilterToken.AND || lastTokenType == LdapFilterToken.OR || lastTokenType == LdapFilterToken.NOT ) ) { StringBuffer sb = new StringBuffer(); while ( Character.isWhitespace( c ) ) { sb.append( c ); c = nextChar(); } prevChar(); return new LdapFilterToken( LdapFilterToken.WHITESPACE, sb.toString(), pos - sb.length() + 1 ); } else { prevChar(); } // check special characters c = nextChar(); switch ( c ) { case '(': this.lastTokenType = LdapFilterToken.LPAR; return new LdapFilterToken( this.lastTokenType, "(", pos ); //$NON-NLS-1$ case ')': if ( lastTokenType != LdapFilterToken.EQUAL && lastTokenType != LdapFilterToken.GREATER && lastTokenType != LdapFilterToken.LESS && lastTokenType != LdapFilterToken.APROX && lastTokenType != LdapFilterToken.SUBSTRING ) { this.lastTokenType = LdapFilterToken.RPAR; return new LdapFilterToken( this.lastTokenType, ")", pos ); //$NON-NLS-1$ } case '&': if ( lastTokenType == LdapFilterToken.LPAR ) { // if(nextNonWhitespaceChar()=='(') { // prevNonWhitespaceChar(); this.lastTokenType = LdapFilterToken.AND; return new LdapFilterToken( this.lastTokenType, "&", pos ); //$NON-NLS-1$ // } // else { // prevNonWhitespaceChar(); // } } break; case '|': if ( lastTokenType == LdapFilterToken.LPAR ) { // if(nextNonWhitespaceChar()=='(') { // prevNonWhitespaceChar(); this.lastTokenType = LdapFilterToken.OR; return new LdapFilterToken( this.lastTokenType, "|", pos ); //$NON-NLS-1$ // } // else { // prevNonWhitespaceChar(); // } } break; case '!': if ( lastTokenType == LdapFilterToken.LPAR ) { // if(nextNonWhitespaceChar()=='(') { // prevNonWhitespaceChar(); this.lastTokenType = LdapFilterToken.NOT; return new LdapFilterToken( this.lastTokenType, "!", pos ); //$NON-NLS-1$ // } // else { // prevNonWhitespaceChar(); // } } break; case '=': if ( lastTokenType == LdapFilterToken.ATTRIBUTE ) { if ( nextChar() == '*' ) { char t = nextChar(); if ( t == ')' || t == '\u0000' ) { prevChar(); this.lastTokenType = LdapFilterToken.PRESENT; return new LdapFilterToken( this.lastTokenType, "=*", pos - 1 ); //$NON-NLS-1$ } else { prevChar(); prevChar(); } } else { prevChar(); } // substring or equal // read till ) or eof, if we found an * we have an substring boolean asteriskFound = false; c = nextNonLinebreakChar(); int count = 1; while ( c != ')' && c != '\u0000' ) { if ( c == '*' ) { asteriskFound = true; break; } c = nextNonLinebreakChar(); count++; } while ( count > 0 ) { prevNonLinebreakChar(); count--; } if ( asteriskFound ) { this.lastTokenType = LdapFilterToken.SUBSTRING; return new LdapFilterToken( this.lastTokenType, "=", pos ); //$NON-NLS-1$ } else { this.lastTokenType = LdapFilterToken.EQUAL; return new LdapFilterToken( this.lastTokenType, "=", pos ); //$NON-NLS-1$ } } else if ( lastTokenType == LdapFilterToken.EXTENSIBLE_EQUALS_COLON ) { this.lastTokenType = LdapFilterToken.EQUAL; return new LdapFilterToken( this.lastTokenType, "=", pos ); //$NON-NLS-1$ } break; case '>': if ( lastTokenType == LdapFilterToken.ATTRIBUTE ) { if ( nextChar() == '=' ) { this.lastTokenType = LdapFilterToken.GREATER; return new LdapFilterToken( this.lastTokenType, ">=", pos - 1 ); //$NON-NLS-1$ } else { prevChar(); } } break; case '<': if ( lastTokenType == LdapFilterToken.ATTRIBUTE ) { if ( nextChar() == '=' ) { this.lastTokenType = LdapFilterToken.LESS; return new LdapFilterToken( this.lastTokenType, "<=", pos - 1 ); //$NON-NLS-1$ } else { prevChar(); } } break; case '~': if ( lastTokenType == LdapFilterToken.ATTRIBUTE ) { if ( nextChar() == '=' ) { this.lastTokenType = LdapFilterToken.APROX; return new LdapFilterToken( this.lastTokenType, "~=", pos - 1 ); //$NON-NLS-1$ } else { prevChar(); } } break; case ':': char t1 = nextChar(); char t2 = nextChar(); char t3 = nextChar(); prevChar(); prevChar(); prevChar(); if ( ( lastTokenType == LdapFilterToken.LPAR || lastTokenType == LdapFilterToken.EXTENSIBLE_ATTRIBUTE ) && ( // ( t1 == ':' && t2 != '=' ) // || // ( ( t1 == 'd' || t1 == 'D' ) && t2 == ':' && t3 == ':' ) // || ( ( t1 == 'd' || t1 == 'D' ) && ( t2 == 'n' || t2 == 'N' ) && ( t3 == ':' ) ) ) ) { this.lastTokenType = LdapFilterToken.EXTENSIBLE_DNATTR_COLON; return new LdapFilterToken( this.lastTokenType, ":", pos ); //$NON-NLS-1$ } else if ( ( lastTokenType == LdapFilterToken.EXTENSIBLE_ATTRIBUTE || lastTokenType == LdapFilterToken.EXTENSIBLE_DNATTR || lastTokenType == LdapFilterToken.EXTENSIBLE_MATCHINGRULEOID || lastTokenType == LdapFilterToken.EXTENSIBLE_MATCHINGRULEOID_COLON ) && t1 == '=' ) { this.lastTokenType = LdapFilterToken.EXTENSIBLE_EQUALS_COLON; return new LdapFilterToken( this.lastTokenType, ":", pos ); //$NON-NLS-1$ } else if ( ( lastTokenType == LdapFilterToken.LPAR || lastTokenType == LdapFilterToken.EXTENSIBLE_ATTRIBUTE || lastTokenType == LdapFilterToken.EXTENSIBLE_DNATTR || lastTokenType == LdapFilterToken.EXTENSIBLE_DNATTR_COLON ) ) { this.lastTokenType = LdapFilterToken.EXTENSIBLE_MATCHINGRULEOID_COLON; return new LdapFilterToken( this.lastTokenType, ":", pos ); //$NON-NLS-1$ } break; } // switch prevChar(); // check attribute or extensible attribute if ( this.lastTokenType == LdapFilterToken.LPAR ) { StringBuffer sb = new StringBuffer(); // first char must be non-whitespace c = nextChar(); while ( c != ':' && c != '=' && c != '<' && c != '>' && c != '~' && c != '(' && c != ')' && c != '\u0000' && !Character.isWhitespace( c ) ) { sb.append( c ); c = nextChar(); } prevChar(); if ( sb.length() > 0 ) { boolean isExtensible = ( c == ':' ); if ( isExtensible ) { this.lastTokenType = LdapFilterToken.EXTENSIBLE_ATTRIBUTE; return new LdapFilterToken( this.lastTokenType, sb.toString(), pos - sb.length() + 1 ); } else { this.lastTokenType = LdapFilterToken.ATTRIBUTE; return new LdapFilterToken( this.lastTokenType, sb.toString(), pos - sb.length() + 1 ); } } } // check value if ( lastTokenType == LdapFilterToken.EQUAL || lastTokenType == LdapFilterToken.GREATER || lastTokenType == LdapFilterToken.LESS || lastTokenType == LdapFilterToken.APROX ) { boolean forbiddenCharFound = false; StringBuffer sb = new StringBuffer(); c = nextNonLinebreakChar(); int count = 0; while ( c != ')' && c != '\u0000' ) { if ( c == '*' || c == '(' ) { forbiddenCharFound = true; break; } sb.append( c ); c = nextNonLinebreakChar(); count++; } prevNonLinebreakChar(); if ( forbiddenCharFound ) { while ( count > 0 ) { prevNonLinebreakChar(); count--; } } else //if ( sb.length() > 0 ) { this.lastTokenType = LdapFilterToken.VALUE; return new LdapFilterToken( this.lastTokenType, sb.toString(), pos - sb.length() + 1 ); } } if ( lastTokenType == LdapFilterToken.SUBSTRING ) { boolean forbiddenCharFound = false; StringBuffer sb = new StringBuffer(); c = nextNonLinebreakChar(); int count = 0; while ( c != ')' && c != '\u0000' ) { if ( c == '(' ) { forbiddenCharFound = true; break; } sb.append( c ); c = nextNonLinebreakChar(); count++; } prevNonLinebreakChar(); if ( forbiddenCharFound ) { while ( count > 0 ) { prevNonLinebreakChar(); count--; } } else if ( sb.length() > 0 ) { this.lastTokenType = LdapFilterToken.VALUE; return new LdapFilterToken( this.lastTokenType, sb.toString(), pos - sb.length() + 1 ); } } // check extensible dn if ( lastTokenType == LdapFilterToken.EXTENSIBLE_DNATTR_COLON ) { char t1 = nextChar(); char t2 = nextChar(); char t3 = nextChar(); prevChar(); if ( ( t1 == 'd' || t1 == 'D' ) && ( t2 == 'n' || t2 == 'N' ) && ( t3 == ':' || t3 == '\u0000' ) ) { this.lastTokenType = LdapFilterToken.EXTENSIBLE_DNATTR; return new LdapFilterToken( this.lastTokenType, "" + t1 + t2, pos - 1 ); //$NON-NLS-1$ } prevChar(); prevChar(); } // check extensible matchingrule if ( lastTokenType == LdapFilterToken.EXTENSIBLE_MATCHINGRULEOID_COLON ) { StringBuffer sb = new StringBuffer(); // first char must be non-whitespace c = nextChar(); while ( c != ':' && c != '=' && c != '<' && c != '>' && c != '~' && c != '(' && c != ')' && c != '\u0000' && !Character.isWhitespace( c ) ) { sb.append( c ); c = nextChar(); } prevChar(); if ( sb.length() > 0 ) { this.lastTokenType = LdapFilterToken.EXTENSIBLE_MATCHINGRULEOID; return new LdapFilterToken( this.lastTokenType, sb.toString(), pos - sb.length() + 1 ); } } // no match StringBuffer sb = new StringBuffer(); c = nextChar(); while ( c != '(' && c != ')' && c != '\u0000' ) { sb.append( c ); c = nextChar(); } prevChar(); // this.lastTokenType = LdapFilterToken.UNKNOWN; // return new LdapFilterToken(this.lastTokenType, sb.toString(), // pos-sb.length()); return new LdapFilterToken( LdapFilterToken.UNKNOWN, sb.toString(), pos - sb.length() + 1 ); } }