/*
* Reference ETL Parser for Java
* Copyright (c) 2000-2009 Constantine A Plotnikov
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package net.sf.etl.parsers.utils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import net.sf.etl.parsers.AbstractParser;
import net.sf.etl.parsers.ObjectName;
import net.sf.etl.parsers.ParserException;
import net.sf.etl.parsers.SourceLocation;
import net.sf.etl.parsers.StandardGrammars;
import net.sf.etl.parsers.TermParser;
import net.sf.etl.parsers.TermToken;
import net.sf.etl.parsers.Terms;
import net.sf.etl.parsers.TextPos;
import net.sf.etl.parsers.Token;
/**
* <p>
* This is an abstract parser that builds trees of objects basing on parser.
* This class was created by refactoring common parts of BeansTermParser and
* EMFTermParser. So might be still not generic enough for other purposes.
* </p>
*
* <p>
* Note that abstract method of this parser are expected to throw an exception
* if structural error occurs (like trying assigning to non existing feature of
* the object)
* </p>
*
* <p>
* Typical usage of the parsers derived from this one is the following:
* </p>
*
* <pre>
* TermParser p = ... ; // configure term parser and start parsing
* try {
* BeansTermBarser beansParser = new BeansTermBarser(p, null);
* while(beansParser.hasNext()) {
* MyBaseBeanType c = (MyBaseBeanType)beansParser.next();
* }
* } finally {
* p.close();
* }
* </pre>
*
* @see net.sf.etl.parsers.beans.BeansTermParser
* @author const
* @param <BaseObjectType>
* this is a base type for returned objects
* @param <FeatureType>
* this is a type for feature metatype used by objects
* @param <MetaObjectType>
* this is a type for meta object type
* @param <HolderType>
* this is a holder type for collection properties
*/
public abstract class AbstractTreeParser<BaseObjectType, FeatureType, MetaObjectType, HolderType> {
/** a logger */
private static final java.util.logging.Logger log = java.util.logging.Logger
.getLogger(AbstractTreeParser.class.getName());
/**
* term parser
*/
protected final TermParser parser;
/**
* This set contains namespaces ignored by parser
*/
protected final HashSet<String> ignoredNamespaces = new HashSet<String>();
/**
* This is map from ignored object names to set of namespaces
*/
final HashMap<String, Set<String>> ignoredObjects = new HashMap<String, Set<String>>();
/** flag indicating that parser had errors */
protected boolean hadErrors = false;
/**
* If this flag is true, when default statement is encountered during
* hasNext(), hasNext returns false (meaning that no more objects are
* expected here).
*/
private boolean abortOnDefault = false;
/**
* The current position policy
*/
private PositionPolicy positionPolicy = PositionPolicy.EXPANDED;
/**
* The current system identifier
*/
protected final String systemId;
/**
* A constructor
*
* @param parser
* a term parser
*/
public AbstractTreeParser(TermParser parser) {
super();
this.parser = parser;
this.systemId = parser.getSystemId();
}
/**
* @return the system identifier for the file being parsed
*/
public String getSystemId() {
return systemId;
}
/**
* @return true if there are more terms in the stream
*/
public boolean hasNext() {
while (true) {
switch (parser.current().kind()) {
case OBJECT_START:
// if object should be ignored, skip object
if (isIgnorable(parser.current().objectName())) {
skipObject();
break;
}
if (abortOnDefault
&& parser.current().objectName().namespace().equals(
StandardGrammars.DEFAULT_NS)) {
return false;
}
return true;
case EOF:
return false;
case GRAMMAR_ERROR:
case SYNTAX_ERROR:
case SEGMENT_ERROR:
case LEXICAL_ERROR:
hadErrors = true;
handleErrorFromParser(parser.current());
default:
advanceParser();
}
}
}
/**
* Advance the parser
*
* @return the result of {@link AbstractParser#advance()}
*/
protected boolean advanceParser() {
return parser.advance();
}
/**
* finish parsing the segment after root object is parsed.
*/
private void finishSegment() {
int segments = 0;
while (true) {
switch (parser.current().kind()) {
case SEGMENT_START:
segments++;
break;
case SEGMENT_END:
if (segments == 0) {
return;
}
segments--;
break;
case EOF:
throw new IllegalStateException(
"Seqments should be properly nested.");
case GRAMMAR_ERROR:
case SYNTAX_ERROR:
case SEGMENT_ERROR:
case LEXICAL_ERROR:
hadErrors = true;
handleErrorFromParser(parser.current());
break;
}
advanceParser();
}
}
/**
* Set abort on object from the namespace of default grammar
* {@link StandardGrammars#DEFAULT_NS}. Encountering objects from this
* namespace usually means that loading grammar has failed, so further
* processing of the source rarely makes sense.
*
* @param value
* if true {@link #hasNext()} is aborted.
*/
public void setAbortOnDefaultGrammar(boolean value) {
abortOnDefault = value;
}
/**
* Get the next object from the stream. Note the method skips until the end
* of the segments, so the errors could be attributed to the correct
* statement object.
*
* @return the next object in the stream
*/
public BaseObjectType next() {
if (!hasNext()) {
throw new IllegalStateException("there are not next object");
}
BaseObjectType rc = parseObject();
finishSegment();
return rc;
}
/**
* Check if object with specified object name should be ignored
*
* @param name
* a name to check
* @return true if object should be ignored
*/
protected boolean isIgnorable(ObjectName name) {
// check if namespace is ignored
if (ignoredNamespaces.contains(name.namespace())) {
return true;
}
// check if specific object is ignored
final Set<String> ns = ignoredObjects.get(parser.current().objectName()
.name());
if (ns != null && ns.contains(name.namespace())) {
return true;
}
return false;
}
/**
* Skip object in the grammar
*/
protected void skipObject() {
int objectCount = 0;
while (true) {
switch (parser.current().kind()) {
case OBJECT_START:
objectCount++;
break;
case OBJECT_END:
objectCount--;
if (objectCount == 0) {
// exit skipping
return;
}
break;
case EOF:
log
.severe("EOF while skipping object. Possibly bug in grammar compiler");
return;
case GRAMMAR_ERROR:
case SYNTAX_ERROR:
case SEGMENT_ERROR:
case LEXICAL_ERROR:
hadErrors = true;
handleErrorFromParser(parser.current());
}
advanceParser();
}
}
/**
* Ignore objects from specified namespace.
*
* @param ns
* namespace to be ignored
*/
public void ignoreNamespace(String ns) {
ignoredNamespaces.add(ns);
}
/**
* @return true if there were errors during parsing process
*/
public boolean hadErrors() {
return hadErrors;
}
/**
* Ignore specific object kind. Primary candidates for such ignoring are
* doctype and blank statements.
*
* @param namespace
* a namespace
* @param name
* a name in namespace
*/
public void ignoreObjects(String namespace, String name) {
Set<String> namespaces = ignoredObjects.get(name);
if (namespaces == null) {
namespaces = new HashSet<String>();
ignoredObjects.put(name, namespaces);
}
namespaces.add(namespace);
}
/**
* Parse object
*
* @return parsed object or null if object cannot be parsed for some reason
*/
private BaseObjectType parseObject() {
assert parser.current().kind() == Terms.OBJECT_START : "parser is not over object"
+ parser.current();
// create instance
final ObjectName name = parser.current().objectName();
final MetaObjectType metaObject = getMetaObject(name);
final BaseObjectType rc = createInstance(metaObject, name);
final Object startValue = setObjectStartPos(rc, metaObject, parser
.current());
objectStarted(rc);
advanceParser();
int extraObjects = 0;
loop: while (true) {
switch (parser.current().kind()) {
case OBJECT_END:
if (extraObjects > 0) {
extraObjects--;
} else {
break loop;
}
case VALUE_START:
case VALUE:
handleUnexpectedValue(parser, parser.current());
advanceParser();
break;
case OBJECT_START:
handleUnexpectedObjectStart(parser, parser.current());
extraObjects++;
advanceParser();
break;
case PROPERTY_START:
case LIST_PROPERTY_START:
parseProperty(rc, metaObject);
break;
case GRAMMAR_ERROR:
case SYNTAX_ERROR:
case SEGMENT_ERROR:
case LEXICAL_ERROR:
handleErrorFromParser(parser.current());
hadErrors = true;
default:
advanceParser();
break;
}
}
assert parser.current().kind() == Terms.OBJECT_END : "parser is not over end: "
+ parser.current();
assert parser.current().objectName().equals(name) : "type name does not match ";
setObjectEndPos(rc, metaObject, startValue, parser.current());
advanceParser();
objectEnded(rc);
return rc;
}
/**
* This method is called when object is about start being processed
*
* @param object
* the object to be processed
*/
protected void objectStarted(BaseObjectType object) {
}
/**
* This method is called when object was stopped to be processed
*
* @param object
* the object that was processed
*/
protected void objectEnded(BaseObjectType object) {
}
/**
* Parse property
*
* @param rc
* an object to parse
* @param metaObject
* a metaobject associated with object
*/
protected void parseProperty(BaseObjectType rc, MetaObjectType metaObject) {
assert parser.current().kind() == Terms.PROPERTY_START
|| parser.current().kind() == Terms.LIST_PROPERTY_START : "parser is not over property: "
+ parser.current();
final FeatureType f = getPropertyMetaObject(rc, metaObject, parser
.current());
final boolean isList = parser.current().kind() == Terms.LIST_PROPERTY_START;
final HolderType holder = isList ? startListCollection(rc, metaObject,
f) : null;
advanceParser();
int extraObjects = 0;
loop: while (true) {
switch (parser.current().kind()) {
case PROPERTY_END:
case LIST_PROPERTY_END:
if (extraObjects > 0) {
extraObjects--;
} else {
break loop;
}
case PROPERTY_START:
case LIST_PROPERTY_START:
handleUnexpectedPropertyStart(parser, parser.current());
extraObjects++;
advanceParser();
break;
case OBJECT_START: {
if (isIgnorable(parser.current().objectName())) {
skipObject();
break;
}
final Object v = parseObject();
if (isList) {
addToFeature(rc, f, holder, v);
} else {
setToFeature(rc, f, v);
}
break;
}
// FIXME multipart values
case VALUE: {
final Token value = parser.current().token().token();
if (isList) {
addValueToFeature(rc, f, holder, value);
} else {
setValueToFeature(rc, f, value);
}
advanceParser();
break;
}
case GRAMMAR_ERROR:
case SYNTAX_ERROR:
case SEGMENT_ERROR:
case LEXICAL_ERROR:
hadErrors = true;
handleErrorFromParser(parser.current());
default:
advanceParser();
break;
}
}
if (isList) {
endListCollection(rc, metaObject, f, holder);
}
}
/**
* Handle error from parser
*
* @param errorToken
* a token to be reported
*/
protected void handleErrorFromParser(TermToken errorToken) {
if (log.isLoggable(Level.SEVERE)) {
log.severe("Error is detected during parsing file "
+ parser.getSystemId() + ": " + errorToken);
}
}
/**
* Handle unexpected property start. Default implementation throws an
* exception. This means a serious bug in grammar. However, subclasses might
* reimplement this method to support some other policy.
*
* @param parser
* a term parser
* @param token
* a token
*/
protected void handleUnexpectedPropertyStart(TermParser parser,
TermToken token) {
throw new ParserException("Unexpected property start inside property:"
+ token);
}
/**
* Handle unexpected property end. Default implementation throws an
* exception. This means a serious bug in grammar. However, subclasses might
* reimplement this method to support some other policy.
*
* @param parser
* a term parser
* @param token
* a token
*/
protected void handleUnexpectedObjectStart(TermParser parser,
TermToken token) {
throw new ParserException("Unexpected object start inside object:"
+ token);
}
/**
* Handle unexpected value. Default implementation throws an exception. This
* means a serious bug in grammar. However, subclasses might reimplement
* this method to support some other policy.
*
* @param parser
* a term parser
* @param token
* a token
*/
protected void handleUnexpectedValue(TermParser parser, TermToken token) {
throw new ParserException("Unexpected value inside object:" + token);
}
/**
* Parse value to fit to feature
*
* @param rc
* a context object
* @param f
* a feature that will be used to set or add this value
* @param value
* a value to parse
* @return parsed value
*/
protected Object parseValue(BaseObjectType rc, FeatureType f, Token value) {
return value.text();
}
/**
* Set value to feature
*
* @param rc
* an object
* @param f
* a feature to update
* @param value
* a value to set
*/
private void setValueToFeature(BaseObjectType rc, FeatureType f, Token value) {
setToFeature(rc, f, parseValue(rc, f, value));
}
/**
* Add value to feature
*
* @param rc
* an object
* @param f
* a feature to update
* @param holder
* a collection
* @param value
* a value to add
*/
private void addValueToFeature(BaseObjectType rc, FeatureType f,
HolderType holder, Token value) {
addToFeature(rc, f, holder, parseValue(rc, f, value));
}
/**
* Set object to feature
*
* @param rc
* an object
* @param f
* a feature to update
* @param v
* a value to set
*/
protected abstract void setToFeature(BaseObjectType rc, FeatureType f,
Object v);
/**
* Add object to feature
*
* @param rc
* an object
* @param f
* a feature to update
* @param holder
* a collection objects
* @param v
* a value to add
*/
protected abstract void addToFeature(BaseObjectType rc, FeatureType f,
HolderType holder, Object v);
/**
* Start list collection. Note that this method has been created primarily
* because of beans parser. That parses need to update array. So to reduce
* array creation it is possible to create an array list from current array
* and than convert it back to array.
*
* @param rc
* an object
* @param metaObject
* an metaobject
* @param f
* a feature to be updated
* @return a collection
*/
protected abstract HolderType startListCollection(BaseObjectType rc,
MetaObjectType metaObject, FeatureType f);
/**
* Finish list collection
*
* @param rc
* an object
* @param metaObject
* an type of object
* @param f
* an feature to update
* @param holder
* an holder of values
*/
protected abstract void endListCollection(BaseObjectType rc,
MetaObjectType metaObject, FeatureType f, HolderType holder);
/**
* get feature meta object
*
* @param rc
* an object
* @param metaObject
* a metaobject to examine
* @param token
* a token that contains LIST_PROPERTY_START or PROPERTY_START
* events.
* @return a feature object
*/
protected FeatureType getPropertyMetaObject(BaseObjectType rc,
MetaObjectType metaObject, TermToken token) {
return getPropertyMetaObject(rc, metaObject, token.propertyName()
.name());
}
/**
* get feature meta object
*
* @param rc
* an object
* @param metaObject
* a metaobject to examine
* @param name
* name of property.
* @return a feature object
*/
protected abstract FeatureType getPropertyMetaObject(BaseObjectType rc,
MetaObjectType metaObject, String name);
/**
* Set start position in object. Default implementation tries to set
* properties startLine, startColumn, and startOffset with corresponding
* values.
*
* @param rc
* an object
* @param metaObject
* an meta object
* @param token
* an start object token
* @return a value to be passed to
* {@link #setObjectEndPos(Object, Object, Object, TermToken)}, the
* default implementation returns the start position.
*/
protected Object setObjectStartPos(BaseObjectType rc,
MetaObjectType metaObject, TermToken token) {
final TextPos pos = token.start();
switch (positionPolicy) {
case EXPANDED:
final FeatureType startLineFeature = getPropertyMetaObject(rc,
metaObject, "startLine");
setToFeature(rc, startLineFeature, new Integer(pos.line()));
final FeatureType startColumnFeature = getPropertyMetaObject(rc,
metaObject, "startColumn");
setToFeature(rc, startColumnFeature, new Integer(pos.column()));
final FeatureType startOffsetFeature = getPropertyMetaObject(rc,
metaObject, "startOffset");
setToFeature(rc, startOffsetFeature, new Long(pos.offset()));
break;
case POSITIONS:
final FeatureType startFeature = getPropertyMetaObject(rc,
metaObject, "start");
setToFeature(rc, startFeature, pos);
break;
}
return pos;
}
/**
* Set end position in object. Default implementation tries to set
* properties endLine, endColumn, and endOffset with corresponding values.
*
* @param rc
* an object
* @param metaObject
* an meta object
* @param startValue
* a value returned from
* {@link #setObjectStartPos(Object, Object, TermToken)}
* @param token
* an end object token
*/
protected void setObjectEndPos(BaseObjectType rc,
MetaObjectType metaObject, Object startValue, TermToken token) {
final TextPos pos = token.start();
switch (positionPolicy) {
case EXPANDED:
final FeatureType endLineFeature = getPropertyMetaObject(rc,
metaObject, "endLine");
setToFeature(rc, endLineFeature, new Integer(pos.line()));
final FeatureType endColumnFeature = getPropertyMetaObject(rc,
metaObject, "endColumn");
setToFeature(rc, endColumnFeature, new Integer(pos.column()));
final FeatureType endOffsetFeature = getPropertyMetaObject(rc,
metaObject, "endOffset");
setToFeature(rc, endOffsetFeature, new Long(pos.offset()));
break;
case POSITIONS:
final FeatureType endFeature = getPropertyMetaObject(rc,
metaObject, "end");
setToFeature(rc, endFeature, pos);
break;
case SOURCE_LOCATION:
final FeatureType locationFeature = getPropertyMetaObject(rc,
metaObject, "location");
setToFeature(rc, locationFeature, new SourceLocation(
(TextPos) startValue, pos, systemId));
break;
default:
throw new IllegalStateException(
"Uknown or unsupported position policy: " + positionPolicy);
}
}
/**
* Set policy on how text position is reported to AST. If the neither policy
* defined in the enumeration {@link PositionPolicy} suits the AST classes,
* a custom policy could be implemented by overriding the methods
* {@link #setObjectStartPos(Object, Object, TermToken)} and
* {@link #setObjectEndPos(Object, Object, Object, TermToken)}.
*
* @param policy
* new value of policy
*/
public void setPosPolicy(PositionPolicy policy) {
if (policy == null) {
throw new NullPointerException("The null policy is not alllowed");
}
this.positionPolicy = policy;
}
/**
* Get meta object by name. Metaobject can be anything that can be used to
* create class. For example BeansTermParser uses BeanInfo as meta object.
*
* @param name
* an object to be mapped to metaobject
* @return an meta object
*/
protected abstract MetaObjectType getMetaObject(ObjectName name);
/**
* Create instance of object from meta object
*
* @param metaObject
* a metaobject
* @param name
* a name of object
* @return new instance
*/
protected abstract BaseObjectType createInstance(MetaObjectType metaObject,
ObjectName name);
/**
* Predefined position setting policies. They determine how start/end
* positions are saved in AST. It is possible to create a custom the policy
* by overriding the methods
* {@link AbstractTreeParser#setObjectStartPos(Object, Object, TermToken)}
* and
* {@link AbstractTreeParser#setObjectEndPos(Object, Object, Object, TermToken)}
* .
*/
public enum PositionPolicy {
/**
* Use field {@code startLine} (int), {@code startColumn}(int), {@code
* startOffset}(long), {@code endLine}, {@code endColumn}, {@code
* endOffset}
*/
EXPANDED,
/** Use fields {@code start} and {@code end} (both are {@link TextPos}) */
POSITIONS,
/** Use the field {@code location} of type {@link SourceLocation}. */
SOURCE_LOCATION,
}
}