/****************************************************************************
*
* https://github.com/vincent-zurczak/Xml-Region-Analyzer
*
* Copyright (c) 2012, Vincent Zurczak - All rights reserved.
* This source file is released under the terms of the BSD license.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University of California, Berkeley nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*****************************************************************************/
package org.teiid.designer.runtime.ui.wizards.vdbs.style;
import java.util.ArrayList;
import java.util.List;
import org.teiid.designer.runtime.ui.wizards.vdbs.style.XmlRegion.XmlRegionType;
/**
* A class that builds style ranges from a XML input.
* @author Vincent Zurczak
* @version 1.0 (tag version)
*/
public class XmlRegionAnalyzer {
private int offset;
/**
* Analyzes a XML document.
* @param xml the XML text (may be an invalid XML document)
* @return a non-null list of XML positions
*/
public List<XmlRegion> analyzeXml( String xml ) {
this.offset = 0;
List<XmlRegion> positions = new ArrayList<XmlRegion> ();
while( this.offset < xml.length()) {
// White spaces
analyzeWhitespaces( xml, positions );
if( this.offset >= xml.length())
break;
// "<" can be several things
char c = xml.charAt( this.offset );
if( c == '<' ) {
if( analyzeInstruction( xml, positions ))
continue;
if( analyzeComment( xml, positions ))
continue;
if( analyzeMarkup( xml, positions ))
continue;
if( analyzeCData( xml, positions ))
continue;
positions.add( new XmlRegion( XmlRegionType.UNEXPECTED, this.offset, xml.length()));
break;
}
// "/" and "/>" can only indicate a mark-up
else if( c == '/' && xml.charAt( this.offset+1 ) == '>' || c == '>' ) {
if( analyzeMarkup( xml, positions ))
continue;
positions.add( new XmlRegion( XmlRegionType.UNEXPECTED, this.offset, xml.length()));
break;
}
// Other things can be...
if( analyzeAttribute( xml, positions ))
continue;
if( analyzeAttributeValue( xml, positions ))
continue;
if( analyzeMarkupValue( xml, positions ))
continue;
positions.add( new XmlRegion( XmlRegionType.UNEXPECTED, this.offset, xml.length()));
break;
}
return positions;
}
/**
* Tries to analyze a XML instruction.
* @param xml the XML text
* @param positions the positions already found
* @return true if it recognized a XML instruction
*/
boolean analyzeInstruction( String xml, List<XmlRegion> positions ) {
boolean result = false;
int newPos = this.offset;
if( newPos < xml.length()
&& xml.charAt( newPos ) == '<'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == '?' ) {
while( ++ newPos < xml.length()
&& xml.charAt( newPos ) != '>' )
newPos = xml.indexOf( '?', newPos );
if( xml.charAt( newPos ) == '>' ) {
positions.add( new XmlRegion( XmlRegionType.INSTRUCTION, this.offset, newPos + 1 ));
this.offset = newPos + 1;
result = true;
}
}
return result;
}
/**
* Tries to analyze a XML comment.
* @param xml the XML text
* @param positions the positions already found
* @return true if it recognized a XML instruction
*/
boolean analyzeComment( String xml, List<XmlRegion> positions ) {
boolean result = false;
int newPos = this.offset;
if( xml.charAt( newPos ) == '<'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == '!'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == '-'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == '-' ) {
int seq = 0;
while( seq != 3
&& ++ newPos < xml.length()) {
char c = xml.charAt( newPos );
seq = c == '-' && seq < 2 || c == '>' && seq == 2 ? seq + 1 : 0;
}
if( seq == 3 )
newPos ++;
positions.add( new XmlRegion( XmlRegionType.COMMENT, this.offset, newPos ));
this.offset = newPos;
result = true;
}
return result;
}
/**
* Tries to analyze a XML mark-up.
* @param xml the XML text
* @param positions the positions already found
* @return true if it recognized a XML instruction
*/
boolean analyzeMarkup( String xml, List<XmlRegion> positions ) {
int newPos = this.offset;
boolean result = false;
// "<..."
if( xml.charAt( newPos ) == '<' ) {
// Do not process a CData section or a comment as a mark-up
if( newPos + 1 < xml.length()
&& xml.charAt( newPos + 1 ) == '!' )
return false;
// Mark-up name
char c = '!';
while( newPos < xml.length()
&& (c = xml.charAt( newPos)) != '>'
&& ! Character.isWhitespace( c ))
newPos ++;
if( c == '>' )
newPos ++;
positions.add( new XmlRegion( XmlRegionType.MARKUP, this.offset, newPos ));
this.offset = newPos;
result = true;
}
// "/>"
else if( xml.charAt( newPos ) == '/'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == '>' ) {
positions.add( new XmlRegion( XmlRegionType.MARKUP, this.offset, ++ newPos ));
this.offset = newPos;
result = true;
}
// "attributes... >"
else if( xml.charAt( newPos ) == '>' ) {
positions.add( new XmlRegion( XmlRegionType.MARKUP, this.offset, ++ newPos ));
this.offset = newPos;
result = true;
}
return result;
}
/**
* Tries to analyze a XML attribute.
* @param xml the XML text
* @param positions the positions already found
* @return true if it recognized a XML instruction
*/
boolean analyzeAttribute( String xml, List<XmlRegion> positions ) {
// An attribute value follows a mark-up
for( int i=positions.size() - 1; i >=0; i-- ) {
XmlRegion xr = positions.get( i );
if( xr.getXmlRegionType() == XmlRegionType.WHITESPACE )
continue;
if( xr.getXmlRegionType() == XmlRegionType.ATTRIBUTE_VALUE )
break;
if( xr.getXmlRegionType() == XmlRegionType.MARKUP ) {
char c = xml.charAt( xr.getEnd() - 1 );
if( c != '>' )
break;
}
return false;
}
// Analyze what we have...
boolean result = false;
int newPos = this.offset;
char c;
while( newPos < xml.length()
&& (c = xml.charAt( newPos )) != '='
&& c != '/'
&& c != '>'
&& ! Character.isWhitespace( c ))
newPos ++;
// Found one?
if( newPos != this.offset ) {
positions.add( new XmlRegion( XmlRegionType.ATTRIBUTE, this.offset, newPos ));
this.offset = newPos;
result = true;
}
return result;
}
/**
* Tries to analyze a mark-up's value.
* @param xml the XML text
* @param positions the positions already found
* @return true if it recognized a XML instruction
*/
boolean analyzeMarkupValue( String xml, List<XmlRegion> positions ) {
// A mark-up value follows a mark-up
for( int i=positions.size() - 1; i >=0; i-- ) {
XmlRegion xr = positions.get( i );
if( xr.getXmlRegionType() == XmlRegionType.WHITESPACE )
continue;
if( xr.getXmlRegionType() == XmlRegionType.MARKUP
|| xr.getXmlRegionType() == XmlRegionType.COMMENT ) {
char c = xml.charAt( xr.getEnd() - 1 );
if( c == '>' )
break;
}
return false;
}
// Read...
boolean result = false;
int newPos = this.offset;
while( newPos < xml.length()
&& xml.charAt( newPos ) != '<' )
newPos ++;
// We read something and this something is not only made up of white spaces
if( this.offset != newPos ) {
// We must here repair the list if the previous position is made up of white spaces
XmlRegion xr = positions.get( positions.size() - 1 );
int start = this.offset;
if( xr.getXmlRegionType() == XmlRegionType.WHITESPACE ) {
start = xr.getStart();
positions.remove( xr );
}
positions.add( new XmlRegion( XmlRegionType.MARKUP_VALUE, start, newPos ));
this.offset = newPos;
result = true;
}
return result;
}
/**
* Tries to analyze a XML attribute's value.
* @param xml the XML text
* @param positions the positions already found
* @return true if it recognized a XML instruction
*/
boolean analyzeAttributeValue( String xml, List<XmlRegion> positions ) {
// An attribute value follows an attribute
for( int i=positions.size() - 1; i >=0; i-- ) {
XmlRegion xr = positions.get( i );
if( xr.getXmlRegionType() == XmlRegionType.WHITESPACE )
continue;
if( xr.getXmlRegionType() == XmlRegionType.ATTRIBUTE )
break;
return false;
}
// Analyze what we have
boolean result = false;
int newPos = this.offset;
if( xml.charAt( newPos ) == '=' ) {
analyzeWhitespaces( xml, positions );
int cpt = 0;
char previous = '!';
while( ++ newPos < xml.length()) {
char c = xml.charAt( newPos );
if( previous != '\\' && c == '"' )
cpt ++;
previous = c;
if( cpt == 2 ) {
newPos ++;
break;
}
}
positions.add( new XmlRegion( XmlRegionType.ATTRIBUTE_VALUE, this.offset, newPos ));
this.offset = newPos;
result = true;
}
return result;
}
/**
* Tries to analyze a CDATA section.
* @param xml the XML text
* @param positions the positions already found
* @return true if it recognized a XML instruction
*/
boolean analyzeCData( String xml, List<XmlRegion> positions ) {
boolean result = false;
int newPos = this.offset;
if( xml.charAt( newPos ) == '<'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == '!'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == '['
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == 'C'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == 'D'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == 'A'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == 'T'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == 'A'
&& ++ newPos < xml.length()
&& xml.charAt( newPos ) == '[') {
int cpt = 0;
while( ++ newPos < xml.length()) {
char c = xml.charAt( newPos );
if( cpt < 2 && c == ']'
|| cpt == 2 && c == '>' )
cpt ++;
else
cpt = 0;
if( cpt == 3 ) {
newPos ++;
break;
}
}
positions.add( new XmlRegion( XmlRegionType.CDATA, this.offset, newPos ));
this.offset = newPos;
result = true;
}
return result;
}
/**
* Tries to analyze white spaces.
* <p>
* If white spaces are found, a XML position is stored and the offset is updated.
* </p>
*
* @param xml the XML text
* @param positions the positions already found
*/
void analyzeWhitespaces( String xml, List<XmlRegion> positions ) {
int i = this.offset;
while( i < xml.length()
&& Character.isWhitespace( xml.charAt( i )))
i++;
if( i != this.offset ) {
positions.add( new XmlRegion( XmlRegionType.WHITESPACE, this.offset, i ));
this.offset = i;
}
}
}