/* * Reference ETL Parser for Java * Copyright (c) 2000-2009 Constantine A Plotnikov * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package net.sf.etl.parsers.utils; import java.util.HashMap; import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import net.sf.etl.parsers.AbstractParser; import net.sf.etl.parsers.ObjectName; import net.sf.etl.parsers.ParserException; import net.sf.etl.parsers.SourceLocation; import net.sf.etl.parsers.StandardGrammars; import net.sf.etl.parsers.TermParser; import net.sf.etl.parsers.TermToken; import net.sf.etl.parsers.Terms; import net.sf.etl.parsers.TextPos; import net.sf.etl.parsers.Token; /** * <p> * This is an abstract parser that builds trees of objects basing on parser. * This class was created by refactoring common parts of BeansTermParser and * EMFTermParser. So might be still not generic enough for other purposes. * </p> * * <p> * Note that abstract method of this parser are expected to throw an exception * if structural error occurs (like trying assigning to non existing feature of * the object) * </p> * * <p> * Typical usage of the parsers derived from this one is the following: * </p> * * <pre> * TermParser p = ... ; // configure term parser and start parsing * try { * BeansTermBarser beansParser = new BeansTermBarser(p, null); * while(beansParser.hasNext()) { * MyBaseBeanType c = (MyBaseBeanType)beansParser.next(); * } * } finally { * p.close(); * } * </pre> * * @see net.sf.etl.parsers.beans.BeansTermParser * @author const * @param <BaseObjectType> * this is a base type for returned objects * @param <FeatureType> * this is a type for feature metatype used by objects * @param <MetaObjectType> * this is a type for meta object type * @param <HolderType> * this is a holder type for collection properties */ public abstract class AbstractTreeParser<BaseObjectType, FeatureType, MetaObjectType, HolderType> { /** a logger */ private static final java.util.logging.Logger log = java.util.logging.Logger .getLogger(AbstractTreeParser.class.getName()); /** * term parser */ protected final TermParser parser; /** * This set contains namespaces ignored by parser */ protected final HashSet<String> ignoredNamespaces = new HashSet<String>(); /** * This is map from ignored object names to set of namespaces */ final HashMap<String, Set<String>> ignoredObjects = new HashMap<String, Set<String>>(); /** flag indicating that parser had errors */ protected boolean hadErrors = false; /** * If this flag is true, when default statement is encountered during * hasNext(), hasNext returns false (meaning that no more objects are * expected here). */ private boolean abortOnDefault = false; /** * The current position policy */ private PositionPolicy positionPolicy = PositionPolicy.EXPANDED; /** * The current system identifier */ protected final String systemId; /** * A constructor * * @param parser * a term parser */ public AbstractTreeParser(TermParser parser) { super(); this.parser = parser; this.systemId = parser.getSystemId(); } /** * @return the system identifier for the file being parsed */ public String getSystemId() { return systemId; } /** * @return true if there are more terms in the stream */ public boolean hasNext() { while (true) { switch (parser.current().kind()) { case OBJECT_START: // if object should be ignored, skip object if (isIgnorable(parser.current().objectName())) { skipObject(); break; } if (abortOnDefault && parser.current().objectName().namespace().equals( StandardGrammars.DEFAULT_NS)) { return false; } return true; case EOF: return false; case GRAMMAR_ERROR: case SYNTAX_ERROR: case SEGMENT_ERROR: case LEXICAL_ERROR: hadErrors = true; handleErrorFromParser(parser.current()); default: advanceParser(); } } } /** * Advance the parser * * @return the result of {@link AbstractParser#advance()} */ protected boolean advanceParser() { return parser.advance(); } /** * finish parsing the segment after root object is parsed. */ private void finishSegment() { int segments = 0; while (true) { switch (parser.current().kind()) { case SEGMENT_START: segments++; break; case SEGMENT_END: if (segments == 0) { return; } segments--; break; case EOF: throw new IllegalStateException( "Seqments should be properly nested."); case GRAMMAR_ERROR: case SYNTAX_ERROR: case SEGMENT_ERROR: case LEXICAL_ERROR: hadErrors = true; handleErrorFromParser(parser.current()); break; } advanceParser(); } } /** * Set abort on object from the namespace of default grammar * {@link StandardGrammars#DEFAULT_NS}. Encountering objects from this * namespace usually means that loading grammar has failed, so further * processing of the source rarely makes sense. * * @param value * if true {@link #hasNext()} is aborted. */ public void setAbortOnDefaultGrammar(boolean value) { abortOnDefault = value; } /** * Get the next object from the stream. Note the method skips until the end * of the segments, so the errors could be attributed to the correct * statement object. * * @return the next object in the stream */ public BaseObjectType next() { if (!hasNext()) { throw new IllegalStateException("there are not next object"); } BaseObjectType rc = parseObject(); finishSegment(); return rc; } /** * Check if object with specified object name should be ignored * * @param name * a name to check * @return true if object should be ignored */ protected boolean isIgnorable(ObjectName name) { // check if namespace is ignored if (ignoredNamespaces.contains(name.namespace())) { return true; } // check if specific object is ignored final Set<String> ns = ignoredObjects.get(parser.current().objectName() .name()); if (ns != null && ns.contains(name.namespace())) { return true; } return false; } /** * Skip object in the grammar */ protected void skipObject() { int objectCount = 0; while (true) { switch (parser.current().kind()) { case OBJECT_START: objectCount++; break; case OBJECT_END: objectCount--; if (objectCount == 0) { // exit skipping return; } break; case EOF: log .severe("EOF while skipping object. Possibly bug in grammar compiler"); return; case GRAMMAR_ERROR: case SYNTAX_ERROR: case SEGMENT_ERROR: case LEXICAL_ERROR: hadErrors = true; handleErrorFromParser(parser.current()); } advanceParser(); } } /** * Ignore objects from specified namespace. * * @param ns * namespace to be ignored */ public void ignoreNamespace(String ns) { ignoredNamespaces.add(ns); } /** * @return true if there were errors during parsing process */ public boolean hadErrors() { return hadErrors; } /** * Ignore specific object kind. Primary candidates for such ignoring are * doctype and blank statements. * * @param namespace * a namespace * @param name * a name in namespace */ public void ignoreObjects(String namespace, String name) { Set<String> namespaces = ignoredObjects.get(name); if (namespaces == null) { namespaces = new HashSet<String>(); ignoredObjects.put(name, namespaces); } namespaces.add(namespace); } /** * Parse object * * @return parsed object or null if object cannot be parsed for some reason */ private BaseObjectType parseObject() { assert parser.current().kind() == Terms.OBJECT_START : "parser is not over object" + parser.current(); // create instance final ObjectName name = parser.current().objectName(); final MetaObjectType metaObject = getMetaObject(name); final BaseObjectType rc = createInstance(metaObject, name); final Object startValue = setObjectStartPos(rc, metaObject, parser .current()); objectStarted(rc); advanceParser(); int extraObjects = 0; loop: while (true) { switch (parser.current().kind()) { case OBJECT_END: if (extraObjects > 0) { extraObjects--; } else { break loop; } case VALUE_START: case VALUE: handleUnexpectedValue(parser, parser.current()); advanceParser(); break; case OBJECT_START: handleUnexpectedObjectStart(parser, parser.current()); extraObjects++; advanceParser(); break; case PROPERTY_START: case LIST_PROPERTY_START: parseProperty(rc, metaObject); break; case GRAMMAR_ERROR: case SYNTAX_ERROR: case SEGMENT_ERROR: case LEXICAL_ERROR: handleErrorFromParser(parser.current()); hadErrors = true; default: advanceParser(); break; } } assert parser.current().kind() == Terms.OBJECT_END : "parser is not over end: " + parser.current(); assert parser.current().objectName().equals(name) : "type name does not match "; setObjectEndPos(rc, metaObject, startValue, parser.current()); advanceParser(); objectEnded(rc); return rc; } /** * This method is called when object is about start being processed * * @param object * the object to be processed */ protected void objectStarted(BaseObjectType object) { } /** * This method is called when object was stopped to be processed * * @param object * the object that was processed */ protected void objectEnded(BaseObjectType object) { } /** * Parse property * * @param rc * an object to parse * @param metaObject * a metaobject associated with object */ protected void parseProperty(BaseObjectType rc, MetaObjectType metaObject) { assert parser.current().kind() == Terms.PROPERTY_START || parser.current().kind() == Terms.LIST_PROPERTY_START : "parser is not over property: " + parser.current(); final FeatureType f = getPropertyMetaObject(rc, metaObject, parser .current()); final boolean isList = parser.current().kind() == Terms.LIST_PROPERTY_START; final HolderType holder = isList ? startListCollection(rc, metaObject, f) : null; advanceParser(); int extraObjects = 0; loop: while (true) { switch (parser.current().kind()) { case PROPERTY_END: case LIST_PROPERTY_END: if (extraObjects > 0) { extraObjects--; } else { break loop; } case PROPERTY_START: case LIST_PROPERTY_START: handleUnexpectedPropertyStart(parser, parser.current()); extraObjects++; advanceParser(); break; case OBJECT_START: { if (isIgnorable(parser.current().objectName())) { skipObject(); break; } final Object v = parseObject(); if (isList) { addToFeature(rc, f, holder, v); } else { setToFeature(rc, f, v); } break; } // FIXME multipart values case VALUE: { final Token value = parser.current().token().token(); if (isList) { addValueToFeature(rc, f, holder, value); } else { setValueToFeature(rc, f, value); } advanceParser(); break; } case GRAMMAR_ERROR: case SYNTAX_ERROR: case SEGMENT_ERROR: case LEXICAL_ERROR: hadErrors = true; handleErrorFromParser(parser.current()); default: advanceParser(); break; } } if (isList) { endListCollection(rc, metaObject, f, holder); } } /** * Handle error from parser * * @param errorToken * a token to be reported */ protected void handleErrorFromParser(TermToken errorToken) { if (log.isLoggable(Level.SEVERE)) { log.severe("Error is detected during parsing file " + parser.getSystemId() + ": " + errorToken); } } /** * Handle unexpected property start. Default implementation throws an * exception. This means a serious bug in grammar. However, subclasses might * reimplement this method to support some other policy. * * @param parser * a term parser * @param token * a token */ protected void handleUnexpectedPropertyStart(TermParser parser, TermToken token) { throw new ParserException("Unexpected property start inside property:" + token); } /** * Handle unexpected property end. Default implementation throws an * exception. This means a serious bug in grammar. However, subclasses might * reimplement this method to support some other policy. * * @param parser * a term parser * @param token * a token */ protected void handleUnexpectedObjectStart(TermParser parser, TermToken token) { throw new ParserException("Unexpected object start inside object:" + token); } /** * Handle unexpected value. Default implementation throws an exception. This * means a serious bug in grammar. However, subclasses might reimplement * this method to support some other policy. * * @param parser * a term parser * @param token * a token */ protected void handleUnexpectedValue(TermParser parser, TermToken token) { throw new ParserException("Unexpected value inside object:" + token); } /** * Parse value to fit to feature * * @param rc * a context object * @param f * a feature that will be used to set or add this value * @param value * a value to parse * @return parsed value */ protected Object parseValue(BaseObjectType rc, FeatureType f, Token value) { return value.text(); } /** * Set value to feature * * @param rc * an object * @param f * a feature to update * @param value * a value to set */ private void setValueToFeature(BaseObjectType rc, FeatureType f, Token value) { setToFeature(rc, f, parseValue(rc, f, value)); } /** * Add value to feature * * @param rc * an object * @param f * a feature to update * @param holder * a collection * @param value * a value to add */ private void addValueToFeature(BaseObjectType rc, FeatureType f, HolderType holder, Token value) { addToFeature(rc, f, holder, parseValue(rc, f, value)); } /** * Set object to feature * * @param rc * an object * @param f * a feature to update * @param v * a value to set */ protected abstract void setToFeature(BaseObjectType rc, FeatureType f, Object v); /** * Add object to feature * * @param rc * an object * @param f * a feature to update * @param holder * a collection objects * @param v * a value to add */ protected abstract void addToFeature(BaseObjectType rc, FeatureType f, HolderType holder, Object v); /** * Start list collection. Note that this method has been created primarily * because of beans parser. That parses need to update array. So to reduce * array creation it is possible to create an array list from current array * and than convert it back to array. * * @param rc * an object * @param metaObject * an metaobject * @param f * a feature to be updated * @return a collection */ protected abstract HolderType startListCollection(BaseObjectType rc, MetaObjectType metaObject, FeatureType f); /** * Finish list collection * * @param rc * an object * @param metaObject * an type of object * @param f * an feature to update * @param holder * an holder of values */ protected abstract void endListCollection(BaseObjectType rc, MetaObjectType metaObject, FeatureType f, HolderType holder); /** * get feature meta object * * @param rc * an object * @param metaObject * a metaobject to examine * @param token * a token that contains LIST_PROPERTY_START or PROPERTY_START * events. * @return a feature object */ protected FeatureType getPropertyMetaObject(BaseObjectType rc, MetaObjectType metaObject, TermToken token) { return getPropertyMetaObject(rc, metaObject, token.propertyName() .name()); } /** * get feature meta object * * @param rc * an object * @param metaObject * a metaobject to examine * @param name * name of property. * @return a feature object */ protected abstract FeatureType getPropertyMetaObject(BaseObjectType rc, MetaObjectType metaObject, String name); /** * Set start position in object. Default implementation tries to set * properties startLine, startColumn, and startOffset with corresponding * values. * * @param rc * an object * @param metaObject * an meta object * @param token * an start object token * @return a value to be passed to * {@link #setObjectEndPos(Object, Object, Object, TermToken)}, the * default implementation returns the start position. */ protected Object setObjectStartPos(BaseObjectType rc, MetaObjectType metaObject, TermToken token) { final TextPos pos = token.start(); switch (positionPolicy) { case EXPANDED: final FeatureType startLineFeature = getPropertyMetaObject(rc, metaObject, "startLine"); setToFeature(rc, startLineFeature, new Integer(pos.line())); final FeatureType startColumnFeature = getPropertyMetaObject(rc, metaObject, "startColumn"); setToFeature(rc, startColumnFeature, new Integer(pos.column())); final FeatureType startOffsetFeature = getPropertyMetaObject(rc, metaObject, "startOffset"); setToFeature(rc, startOffsetFeature, new Long(pos.offset())); break; case POSITIONS: final FeatureType startFeature = getPropertyMetaObject(rc, metaObject, "start"); setToFeature(rc, startFeature, pos); break; } return pos; } /** * Set end position in object. Default implementation tries to set * properties endLine, endColumn, and endOffset with corresponding values. * * @param rc * an object * @param metaObject * an meta object * @param startValue * a value returned from * {@link #setObjectStartPos(Object, Object, TermToken)} * @param token * an end object token */ protected void setObjectEndPos(BaseObjectType rc, MetaObjectType metaObject, Object startValue, TermToken token) { final TextPos pos = token.start(); switch (positionPolicy) { case EXPANDED: final FeatureType endLineFeature = getPropertyMetaObject(rc, metaObject, "endLine"); setToFeature(rc, endLineFeature, new Integer(pos.line())); final FeatureType endColumnFeature = getPropertyMetaObject(rc, metaObject, "endColumn"); setToFeature(rc, endColumnFeature, new Integer(pos.column())); final FeatureType endOffsetFeature = getPropertyMetaObject(rc, metaObject, "endOffset"); setToFeature(rc, endOffsetFeature, new Long(pos.offset())); break; case POSITIONS: final FeatureType endFeature = getPropertyMetaObject(rc, metaObject, "end"); setToFeature(rc, endFeature, pos); break; case SOURCE_LOCATION: final FeatureType locationFeature = getPropertyMetaObject(rc, metaObject, "location"); setToFeature(rc, locationFeature, new SourceLocation( (TextPos) startValue, pos, systemId)); break; default: throw new IllegalStateException( "Uknown or unsupported position policy: " + positionPolicy); } } /** * Set policy on how text position is reported to AST. If the neither policy * defined in the enumeration {@link PositionPolicy} suits the AST classes, * a custom policy could be implemented by overriding the methods * {@link #setObjectStartPos(Object, Object, TermToken)} and * {@link #setObjectEndPos(Object, Object, Object, TermToken)}. * * @param policy * new value of policy */ public void setPosPolicy(PositionPolicy policy) { if (policy == null) { throw new NullPointerException("The null policy is not alllowed"); } this.positionPolicy = policy; } /** * Get meta object by name. Metaobject can be anything that can be used to * create class. For example BeansTermParser uses BeanInfo as meta object. * * @param name * an object to be mapped to metaobject * @return an meta object */ protected abstract MetaObjectType getMetaObject(ObjectName name); /** * Create instance of object from meta object * * @param metaObject * a metaobject * @param name * a name of object * @return new instance */ protected abstract BaseObjectType createInstance(MetaObjectType metaObject, ObjectName name); /** * Predefined position setting policies. They determine how start/end * positions are saved in AST. It is possible to create a custom the policy * by overriding the methods * {@link AbstractTreeParser#setObjectStartPos(Object, Object, TermToken)} * and * {@link AbstractTreeParser#setObjectEndPos(Object, Object, Object, TermToken)} * . */ public enum PositionPolicy { /** * Use field {@code startLine} (int), {@code startColumn}(int), {@code * startOffset}(long), {@code endLine}, {@code endColumn}, {@code * endOffset} */ EXPANDED, /** Use fields {@code start} and {@code end} (both are {@link TextPos}) */ POSITIONS, /** Use the field {@code location} of type {@link SourceLocation}. */ SOURCE_LOCATION, } }