/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package org.apache.directory.studio.ldifparser.parser;
import java.io.EOFException;
import java.io.IOException;
import java.io.Reader;
// RFC 2849
//
// ldif-file = ldif-content / ldif-changes
// ldif-content = version-spec 1*(1*SEP ldif-attrval-record)
// ldif-changes = version-spec 1*(1*SEP ldif-change-record)
// ldif-attrval-record = dn-spec SEP 1*attrval-spec
// ldif-change-record = dn-spec SEP *control changerecord
// version-spec = "version:" FILL version-number
// version-number = 1*DIGIT
// ; version-number MUST be "1" for the
// ; LDIF format described in this document.
// dn-spec = "dn:" (FILL distinguishedName /
// ":" FILL base64-distinguishedName)
// distinguishedName = SAFE-STRING
// ; a distinguished name, as defined in [3]
// base64-distinguishedName = BASE64-UTF8-STRING
// ; a distinguishedName which has been base64
// ; encoded (see note 10, below)
// rdn = SAFE-STRING
// ; a relative distinguished name, defined as
// ; <name-component> in [3]
// base64-rdn = BASE64-UTF8-STRING
// ; an rdn which has been base64 encoded (see
// ; note 10, below)
// control = "control:" FILL ldap-oid ; controlType
// 0*1(1*SPACE ("true" / "false")) ; criticality
// 0*1(value-spec) ; controlValue
// SEP
// ; (See note 9, below)
// ldap-oid = 1*DIGIT 0*1("." 1*DIGIT)
// ; An LDAPOID, as defined in [4]
// attrval-spec = AttributeDescription value-spec SEP
// value-spec = ":" ( FILL 0*1(SAFE-STRING) /
// ":" FILL (BASE64-STRING) /
// "<" FILL url)
// ; See notes 7 and 8, below
// url = <a Uniform Resource Locator,
// as defined in [6]>
// ; (See Note 6, below)
// AttributeDescription = AttributeType [";" options]
// ; Definition taken from [4]
// AttributeType = ldap-oid / (ALPHA *(attr-type-chars))
// options = option / (option ";" options)
// option = 1*opt-char
// attr-type-chars = ALPHA / DIGIT / "-"
// opt-char = attr-type-chars
// changerecord = "changetype:" FILL
// (change-add / change-delete /
// change-modify / change-moddn)
// change-add = "add" SEP 1*attrval-spec
// change-delete = "delete" SEP
// change-moddn = ("modrdn" / "moddn") SEP
// "newrdn:" ( FILL rdn /
// ":" FILL base64-rdn) SEP
// "deleteoldrdn:" FILL ("0" / "1") SEP
// 0*1("newsuperior:"
// ( FILL distinguishedName /
// ":" FILL base64-distinguishedName) SEP)
// change-modify = "modify" SEP *mod-spec
// mod-spec = ("add:" / "delete:" / "replace:")
// FILL AttributeDescription SEP
// *attrval-spec
// "-" SEP
// SPACE = %x20
// ; ASCII SP, space
// FILL = *SPACE
// SEP = (CR LF / LF)
// CR = %x0D
// ; ASCII CR, carriage return
// LF = %x0A
// ; ASCII LF, line feed
// ALPHA = %x41-5A / %x61-7A
// ; A-Z / a-z
// DIGIT = %x30-39
// ; 0-9
// UTF8-1 = %x80-BF
// UTF8-2 = %xC0-DF UTF8-1
// UTF8-3 = %xE0-EF 2UTF8-1
// UTF8-4 = %xF0-F7 3UTF8-1
// UTF8-5 = %xF8-FB 4UTF8-1
// UTF8-6 = %xFC-FD 5UTF8-1
// SAFE-CHAR = %x01-09 / %x0B-0C / %x0E-7F
// ; any value <= 127 decimal except NUL, LF,
// ; and CR
// SAFE-INIT-CHAR = %x01-09 / %x0B-0C / %x0E-1F /
// %x21-39 / %x3B / %x3D-7F
// ; any value <= 127 except NUL, LF, CR,
// ; SPACE, colon (":", ASCII 58 decimal)
// ; and less-than ("<" , ASCII 60 decimal)
// SAFE-STRING = [SAFE-INIT-CHAR *SAFE-CHAR]
// UTF8-CHAR = SAFE-CHAR / UTF8-2 / UTF8-3 /
// UTF8-4 / UTF8-5 / UTF8-6
// UTF8-STRING = *UTF8-CHAR
// BASE64-UTF8-STRING = BASE64-STRING
// ; MUST be the base64 encoding of a
// ; UTF8-STRING
// BASE64-CHAR = %x2B / %x2F / %x30-39 / %x3D / %x41-5A /
// %x61-7A
// ; +, /, 0-9, =, A-Z, and a-z
// ; as specified in [5]
// BASE64-STRING = [*(BASE64-CHAR)]
public class LdifScanner
{
private Reader ldifReader;
private char[] buffer = new char[256];
private StringBuffer ldifBuffer;
private int ldifBufferOffset;
private int pos;
public LdifScanner()
{
super();
}
public void setLdif( Reader ldifReader )
{
// this.ldif = ldif;
this.ldifReader = ldifReader;
this.pos = -1;
this.ldifBuffer = new StringBuffer();
this.ldifBufferOffset = 0;
}
char currentChar() throws EOFException
{
// check and fill buffer
try
{
int num = 0;
while ( ldifBufferOffset + ldifBuffer.length() <= pos && num > -1 )
{
num = this.ldifReader.read( buffer );
if ( num > -1 )
{
ldifBuffer.append( buffer, 0, num );
}
}
}
catch ( IOException e )
{
}
if ( 0 <= pos && pos < ldifBufferOffset + ldifBuffer.length() )
{
try
{
return ldifBuffer.charAt( pos - ldifBufferOffset );
}
catch ( RuntimeException e )
{
e.printStackTrace();
throw e;
}
}
else
{
throw new EOFException();
}
// return 0<=pos&&pos<ldif.length() ? ldif.charAt(pos) : '\u0000';
}
void addFolding( StringBuffer sb )
{
int oldPos = pos;
try
{
pos++;
char c = currentChar();
if ( c == '\n' || c == '\r' )
{
StringBuffer temp = new StringBuffer( 3 );
temp.append( c );
if ( c == '\r' )
{
pos++;
c = currentChar();
if ( c == '\n' )
{
temp.append( c );
}
else
{
pos--;
}
}
else if ( c == '\n' )
{
pos++;
c = currentChar();
if ( c == '\r' )
{
temp.append( c );
}
else
{
pos--;
}
}
pos++;
c = currentChar();
if ( c == ' ' )
{
// space after newline, continue
temp.append( c );
sb.append( temp );
}
else
{
for ( int i = 0; i < temp.length(); i++ )
{
pos--;
}
pos--;
}
}
else
{
pos--;
}
}
catch ( EOFException e )
{
// reset position
pos = oldPos;
}
}
/**
* Reads the next character from input stram if available. If read was
* possible the character is appended to the given StringBuffer and
* returned. Otherwise throws a EOFException. Additionally this method
* checks folding sequence SEP + SPACE. If any folding sequence was
* found the sequence is appended to the given StringBuffer. So it is
* possible the StringBuffer doesn't end with the read character after
* calling this method but with a folding sequence
*
* @param sb
* @return the next character if available
* @throws EOFException
*/
public char read( StringBuffer sb ) throws EOFException
{
try
{
// check EOF
// if(pos > -1) {
// currentChar();
// }
// get next char
pos++;
char c = currentChar();
sb.append( c );
// folding
addFolding( sb );
return c;
}
catch ( EOFException e )
{
pos--;
throw e;
}
}
void removeFolding( StringBuffer sb )
{
int oldPos = pos;
try
{
char c = currentChar();
pos--;
if ( c == ' ' )
{
StringBuffer temp = new StringBuffer();
temp.insert( 0, c );
c = currentChar();
pos--;
if ( c == '\n' || c == '\r' )
{
if ( c == '\r' )
{
temp.insert( 0, c );
c = currentChar();
pos--;
if ( c == '\n' )
{
temp.insert( 0, c );
}
else
{
pos++;
}
}
else if ( c == '\n' )
{
temp.insert( 0, c );
c = currentChar();
pos--;
if ( c == '\r' )
{
temp.insert( 0, c );
}
else
{
pos++;
}
}
sb.delete( sb.length() - temp.length(), sb.length() );
}
else
{
pos++;
pos++;
}
}
else
{
pos++;
}
}
catch ( EOFException e )
{
// reset position
pos = oldPos;
}
}
/**
* Inverses the previous read().
*
* @param sb
* @throws EOFException
*/
public void unread( StringBuffer sb )
{
removeFolding( sb );
if ( pos > -1 )
{
pos--;
if ( sb.length() > 0 )
{
sb.deleteCharAt( sb.length() - 1 );
}
}
}
private String getFullLine( String start )
{
String s1 = this.getWord( start );
if ( s1 != null )
{
String s2 = getContent( false );
return s2 != null ? s1 + s2 : s1;
}
else
{
return null;
}
}
private String getContent( boolean allowEmptyContent )
{
StringBuffer sb = new StringBuffer( 256 );
try
{
char c = '\u0000';
while ( c != '\n' && c != '\r' )
{
c = read( sb );
}
unread( sb );
}
catch ( EOFException e )
{
}
return sb.length() > 0 || allowEmptyContent ? sb.toString() : null;
}
// private String getStartAndFill(String start) {
// String s = this.getWord(start);
// if(s != null) {
// StringBuffer sb = new StringBuffer(s);
//
// try {
// char c = '\u0000';
// while (c==' ') {
// c = read(sb);
// }
// unread(sb);
// } catch (EOFException e) {
// }
//
// return sb.toString();
// }
// else {
// return null;
// }
// }
private String getWord( String word )
{
StringBuffer sb = new StringBuffer();
// read
try
{
boolean matches = true;
for ( int i = 0; i < word.length(); i++ )
{
char c = read( sb );
//if ( c != word.charAt( i ) )
if ( Character.toUpperCase( c ) != Character.toUpperCase( word.charAt( i ) ) )
{
matches = false;
unread( sb );
break;
}
}
if ( matches )
{
return sb.toString();
}
}
catch ( EOFException e )
{
}
// unread
while ( sb.length() > 0 )
{
unread( sb );
}
// prevChar(sb);
return null;
}
private String getWordTillColon( String word )
{
String wordWithColon = word + ":"; //$NON-NLS-1$
String line = getWord( wordWithColon );
if ( line != null )
{
StringBuffer sb = new StringBuffer( line );
unread( sb );
return sb.toString();
}
// allow eof and sep
line = getWord( word );
if ( line != null )
{
StringBuffer sb = new StringBuffer( line );
try
{
char c = read( sb );
unread( sb );
if ( c == '\r' || c == '\n' )
{
return sb.toString();
}
else
{
while ( sb.length() > 0 )
{
unread( sb );
}
return null;
}
}
catch ( EOFException e )
{
return sb.toString();
}
}
return null;
}
private void flushBuffer()
{
if ( this.ldifBufferOffset < this.pos && this.ldifBuffer.length() > 0 )
{
int delta = Math.min( pos - this.ldifBufferOffset, this.ldifBuffer.length() );
delta--;
this.ldifBuffer.delete( 0, delta );
this.ldifBufferOffset += delta;
}
}
public LdifToken matchCleanupLine()
{
this.flushBuffer();
String line = getContent( false );
LdifToken sep = matchSep();
if ( line != null || sep != null )
{
if ( line == null )
line = ""; //$NON-NLS-1$
if ( sep != null )
line += sep.getValue();
return new LdifToken( LdifToken.UNKNOWN, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchOther()
{
this.flushBuffer();
String line = getContent( false );
if ( line != null )
{
LdifToken sep = matchSep();
if ( sep != null )
line += sep.getValue();
return new LdifToken( LdifToken.UNKNOWN, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchEOF()
{
this.flushBuffer();
StringBuffer sb = new StringBuffer( 1 );
try
{
read( sb );
unread( sb );
return null;
}
catch ( EOFException e )
{
return new LdifToken( LdifToken.EOF, "", pos + 1 ); //$NON-NLS-1$
}
}
public LdifToken matchSep()
{
this.flushBuffer();
try
{
StringBuffer sb = new StringBuffer();
char c = read( sb );
if ( c == '\n' || c == '\r' )
{
// check for two-char-linebreak
try
{
if ( c == '\r' )
{
c = read( sb );
if ( c != '\n' )
{
unread( sb );
}
}
else if ( c == '\n' )
{
c = read( sb );
if ( c != '\r' )
{
unread( sb );
}
}
}
catch ( EOFException e )
{
}
return new LdifToken( LdifToken.SEP, sb.toString(), pos - sb.length() + 1 );
}
else
{
unread( sb );
}
}
catch ( EOFException e )
{
}
return null;
}
public LdifToken matchComment()
{
this.flushBuffer();
String line = getFullLine( "#" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.COMMENT, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchVersionSpec()
{
this.flushBuffer();
String line = getWordTillColon( "version" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.VERSION_SPEC, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchDnSpec()
{
this.flushBuffer();
String line = getWordTillColon( "dn" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.DN_SPEC, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchControlSpec()
{
this.flushBuffer();
String line = getWordTillColon( "control" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.CONTROL_SPEC, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchChangeTypeSpec()
{
this.flushBuffer();
String line = getWordTillColon( "changetype" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.CHANGETYPE_SPEC, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchChangeType()
{
this.flushBuffer();
String line = getWord( "add" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.CHANGETYPE_ADD, line, pos - line.length() + 1 );
}
line = getWord( "modify" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.CHANGETYPE_MODIFY, line, pos - line.length() + 1 );
}
line = getWord( "delete" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.CHANGETYPE_DELETE, line, pos - line.length() + 1 );
}
line = getWord( "moddn" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.CHANGETYPE_MODDN, line, pos - line.length() + 1 );
}
line = getWord( "modrdn" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.CHANGETYPE_MODDN, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchCriticality()
{
this.flushBuffer();
StringBuffer sb = new StringBuffer();
String s = getWord( " " ); //$NON-NLS-1$
while ( s != null )
{
sb.append( s );
s = getWord( " " ); //$NON-NLS-1$
}
String t = getWord( "true" ); //$NON-NLS-1$
if ( t != null )
{
sb.append( t );
return new LdifToken( LdifToken.CONTROL_CRITICALITY_TRUE, sb.toString(), pos - sb.length() + 1 );
}
String f = getWord( "false" ); //$NON-NLS-1$
if ( f != null )
{
sb.append( f );
return new LdifToken( LdifToken.CONTROL_CRITICALITY_FALSE, sb.toString(), pos - sb.length() + 1 );
}
while ( sb.length() > 0 )
{
unread( sb );
}
// for(int i=0; i<sb.length(); i++) {
// unread(sb);
// }
return null;
}
public LdifToken matchNumber()
{
this.flushBuffer();
try
{
StringBuffer sb = new StringBuffer();
char c = read( sb );
if ( '0' <= c && c <= '9' )
{
try
{
while ( '0' <= c && c <= '9' )
{
c = read( sb );
}
unread( sb );
}
catch ( EOFException e )
{
}
return new LdifToken( LdifToken.NUMBER, sb.toString(), pos - sb.length() + 1 );
}
else
{
unread( sb );
}
}
catch ( EOFException e )
{
}
return null;
}
public LdifToken matchOid()
{
this.flushBuffer();
try
{
StringBuffer sb = new StringBuffer();
char c = read( sb );
if ( '0' <= c && c <= '9' )
{
try
{
while ( '0' <= c && c <= '9' || c == '.' )
{
c = read( sb );
}
unread( sb );
}
catch ( EOFException e )
{
}
return new LdifToken( LdifToken.OID, sb.toString(), pos - sb.length() + 1 );
}
else
{
unread( sb );
}
}
catch ( EOFException e )
{
}
return null;
}
public LdifToken matchAttributeDescription()
{
this.flushBuffer();
try
{
StringBuffer sb = new StringBuffer();
char c = read( sb );
if ( 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' )
{
try
{
while ( 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '.'
|| c == ';' || c == '-' || c == '_' )
{
c = read( sb );
}
unread( sb );
}
catch ( EOFException e )
{
}
return new LdifToken( LdifToken.ATTRIBUTE, sb.toString(), pos - sb.length() + 1 );
}
else
{
unread( sb );
}
}
catch ( EOFException e )
{
}
// // a-z,A-Z,0-9,.,-,;
// StringBuffer sb = new StringBuffer();
// char c = nextChar(sb);
// if('a'<=c&&c<='z' || 'A'<=c&&c<='Z' || '0'<=c&&c<='9') {
// while('a'<=c&&c<='z' || 'A'<=c&&c<='Z' || '0'<=c&&c<='9' || c=='.' ||
// c==';' || c=='-') {
// sb.append(c);
// c = nextChar(sb);
// }
// unread(sb);
//
// return new LdifToken(LdifToken.ATTRIBUTE, sb.toString(),
// pos-sb.length()+1);
// }
// else {
// unread(sb);
// }
return null;
}
/**
* Matches "add", "replace", or "delete"
*
* @return the LIDF token if matched, null if not matched
*/
public LdifToken matchModTypeSpec()
{
this.flushBuffer();
String line = getWord( "add" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.MODTYPE_ADD_SPEC, line, pos - line.length() + 1 );
}
line = getWord( "replace" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.MODTYPE_REPLACE_SPEC, line, pos - line.length() + 1 );
}
line = getWord( "delete" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.MODTYPE_DELETE_SPEC, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchModSep()
{
this.flushBuffer();
String line = getWord( "-" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.MODTYPE_SEP, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchValueType()
{
this.flushBuffer();
try
{
StringBuffer sb = new StringBuffer();
char c = read( sb );
if ( c == ':' )
{
int tokenType = LdifToken.VALUE_TYPE_SAFE;
try
{
c = read( sb );
if ( c == ':' )
{
tokenType = LdifToken.VALUE_TYPE_BASE64;
}
else if ( c == '<' )
{
tokenType = LdifToken.VALUE_TYPE_URL;
}
else
{
tokenType = LdifToken.VALUE_TYPE_SAFE;
unread( sb );
}
c = read( sb );
while ( c == ' ' )
{
c = read( sb );
}
unread( sb );
}
catch ( EOFException e )
{
}
return new LdifToken( tokenType, sb.toString(), pos - sb.length() + 1 );
}
else
{
unread( sb );
}
}
catch ( EOFException e )
{
}
return null;
}
public LdifToken matchValue()
{
this.flushBuffer();
String line = getContent( true );
if ( line != null )
{
return new LdifToken( LdifToken.VALUE, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchNewrdnSpec()
{
this.flushBuffer();
String line = getWordTillColon( "newrdn" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.MODDN_NEWRDN_SPEC, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchDeleteoldrdnSpec()
{
this.flushBuffer();
String line = getWordTillColon( "deleteoldrdn" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.MODDN_DELOLDRDN_SPEC, line, pos - line.length() + 1 );
}
return null;
}
public LdifToken matchNewsuperiorSpec()
{
this.flushBuffer();
String line = getWordTillColon( "newsuperior" ); //$NON-NLS-1$
if ( line != null )
{
return new LdifToken( LdifToken.MODDN_NEWSUPERIOR_SPEC, line, pos - line.length() + 1 );
}
return null;
}
}