/* * Reference ETL Parser for Java * Copyright (c) 2000-2009 Constantine A Plotnikov * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package net.sf.etl.parsers.internal.term_parser.flattened; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import net.sf.etl.parsers.ErrorInfo; import net.sf.etl.parsers.PhraseParser; import net.sf.etl.parsers.PhraseParserFactory; import net.sf.etl.parsers.StandardGrammars; import net.sf.etl.parsers.TermParser; import net.sf.etl.parsers.TermParserFactory; import net.sf.etl.parsers.TermToken; import net.sf.etl.parsers.Terms; import net.sf.etl.parsers.TextPos; import net.sf.etl.parsers.internal.term_parser.DefaultTermParser; import net.sf.etl.parsers.internal.term_parser.GrammarLocator; import net.sf.etl.parsers.internal.term_parser.bootstrap.BootstrapETLParserLite; import net.sf.etl.parsers.internal.term_parser.flattened.DirectedAcyclicGraph.Node; import net.sf.etl.parsers.internal.term_parser.grammar.Element; import net.sf.etl.parsers.internal.term_parser.grammar.Grammar; import net.sf.etl.parsers.internal.term_parser.grammar.GrammarLiteTermParser; import org.xml.sax.InputSource; /** * This class represents an assembly of grammars that are being compiled * together. It creates flattened grammar representations. * * @author const */ // NOTE POST 0.2: avoid loading already compiled grammars that are referred // only by grammar import. However we still need to load included grammars. public class GrammarAssembly { /** * A logger for this class */ private final static Logger log = Logger.getLogger(GrammarAssembly.class .getName()); /** * A root grammar. */ GrammarView rootGrammar; /** * A map from systemId to grammar view */ private final Map<String, GrammarView> grammarViews = new LinkedHashMap<String, GrammarView>(); /** * Grammar inclusion DAG */ private final DirectedAcyclicGraph<GrammarView> grammarIncludeDAG = new DirectedAcyclicGraph<GrammarView>(); /** * Context inclusions along with grammars */ private final DirectedAcyclicGraph<ContextView> contextGrammarIncludeDAG = new DirectedAcyclicGraph<ContextView>(); /** * True if there were errors in this grammar */ private boolean hadErrors; /** * A grammar locator associated with this assembly */ private final GrammarLocator locator; /** * A term parser that initially requested loading of the grammar. This term * parser is used for reporting errors to. */ private final DefaultTermParser contextTermParser; /** * A constructor * * @param locator * a grammar locator * @param contextTermParser * a context term parser */ public GrammarAssembly(GrammarLocator locator, DefaultTermParser contextTermParser) { super(); this.locator = locator; this.contextTermParser = contextTermParser; } /** * Resolve reference to external grammar using systemId and publicId. Also * no processing is done on grammar view. * * Note that either system id or public id might be null but not both at the * same time. * * @param source * a source to parse * @return a view of grammar. If grammar assembly returned an object, it * will always return the same object again. */ public GrammarView resolveGrammar(InputSource source) { assert source.getSystemId() != null || StandardGrammars.ETL_GRAMMAR_PUBLIC_ID.equals(source .getPublicId()); if (StandardGrammars.ETL_GRAMMAR_SYSTEM_ID.equals(source.getSystemId()) || StandardGrammars.ETL_GRAMMAR_PUBLIC_ID.equals(source .getPublicId())) { // close all opened stream if (source.getCharacterStream() != null) { try { source.getCharacterStream().close(); source.setCharacterStream(null); } catch (final Exception ex) { // do nothing } } if (source.getByteStream() != null) { try { source.getByteStream().close(); source.setByteStream(null); } catch (final Exception ex) { // do nothing } } source.setEncoding(null); source.setPublicId(StandardGrammars.ETL_GRAMMAR_PUBLIC_ID); source.setSystemId(StandardGrammars.ETL_GRAMMAR_SYSTEM_ID); } GrammarView v = grammarViews.get(source.getSystemId()); if (v == null) { // In bootstrap mode only one grammar might be read. final Grammar g = parseGrammar(source); if (g != null) { v = new GrammarView(this, g, source.getSystemId(), source .getPublicId()); grammarViews.put(v.getSystemId(), v); } } return v; } /** * parse resource * * @param source * a resource to parse * @return parsed grammar */ private Grammar parseGrammar(InputSource source) { if (StandardGrammars.ETL_GRAMMAR_SYSTEM_ID.equals(source.getSystemId())) { final PhraseParser phraseParser = PhraseParserFactory.newInstance() .newPhraseParser(); phraseParser.parse(source); try { final BootstrapETLParserLite parser = new BootstrapETLParserLite( phraseParser); return parser.parse(); } finally { phraseParser.close(); } } else { try { final TermParser termParser = TermParserFactory.newInstance() .newTermParser(); termParser.parse(source); try { termParser.advance(); final GrammarLiteTermParser parser = new GrammarLiteTermParser( termParser) { /** * @see net.sf.etl.parsers.utils.AbstractTreeParser#handleErrorFromParser(net.sf.etl.parsers.TermToken) */ @Override protected void handleErrorFromParser( TermToken errorToken) { ErrorInfo ei = errorToken.errorInfo(); TextPos pos = contextTermParser.currentPos(); contextTermParser.append(new TermToken( Terms.GRAMMAR_ERROR, pos, pos, ei)); } }; parser.ignoreNamespace(StandardGrammars.DOCTYPE_NS); parser.setAbortOnDefaultGrammar(true); parser.ignoreObjects( StandardGrammars.ETL_GRAMMAR_NAMESPACE, "BlankTopLevel"); if (parser.hasNext()) { final Object o = parser.next(); if (parser.hadErrors()) { error("grammar.ParseError", source.getPublicId(), source.getSystemId()); return null; } if (parser.hasNext()) { error("grammar.TooManyGrammars", source .getPublicId(), source.getSystemId()); } return (Grammar) o; } else { error("grammar.EmptyGrammar", source.getPublicId(), source.getSystemId()); return null; } } finally { termParser.close(); } } catch (final Exception ex) { log.log(Level.SEVERE, "Exception during parsing grammar with public id \"" + source.getPublicId() + "\" and systemId \"" + source.getSystemId() + "\"", ex); error("grammar.ParseError", source.getPublicId(), source .getSystemId()); return null; } } } /** * Get include node in DAG * * @param view * view that will be represented by node * @return a node in include graph */ public Node<GrammarView> getIncludeNode(GrammarView view) { return grammarIncludeDAG.getNode(view); } /** * Process grammars creating flattened view of them. * * @param source * a source of root grammar to process */ public void processGrammars(InputSource source) { rootGrammar = resolveGrammar(source); if (rootGrammar == null) { assert hadErrors(); return; } if (rootGrammar.isAbstract()) { error("grammar.AbstractRootGrammar", rootGrammar.getSystemId(), source.getPublicId()); } rootGrammar.loadRelatedGrammars(new HashSet<GrammarView>()); if (hadErrors()) { return; } // M1: All referenced grammars are loaded. grammarIncludeDAG.minimizeImmediate(); final List<GrammarView> grammars = grammarIncludeDAG .topologicalSortObjects(); for (final Iterator<GrammarView> i = grammars.iterator(); i.hasNext();) { final GrammarView v = i.next(); v.gatherImports(); } if (hadErrors()) { return; } // M2: All grammars has set of imports built. for (final Iterator<GrammarView> i = grammars.iterator(); i.hasNext();) { final GrammarView v = i.next(); v.buildContexts(); } if (hadErrors()) { return; } // M3: All contexts for grammars are created, include relationships are // created. contextGrammarIncludeDAG.minimizeImmediate(); final List<ContextView> contextsByGrammarInclude = contextGrammarIncludeDAG .topologicalSortObjects(); for (final Iterator<ContextView> i = contextsByGrammarInclude .iterator(); i.hasNext();) { final ContextView v = i.next(); v.implementGrammarInclude(); } if (hadErrors()) { return; } // M4: Imports and definitions are gathered by direction of grammar // include // After this step it is not necessary to process abstract grammars and // all // / processing is done locally to grammars because it does not have to // deal // / with cross grammar definitions. for (final Iterator<GrammarView> i = grammars.iterator(); i.hasNext();) { final GrammarView v = i.next(); if (!v.isAbstract()) { v.flattenGrammar(); } } // NOTE POST 0.2: Should be there validation phase? } /** * Get DAG that represents context grammar includes. Currently a common * graph used. In future possibly different graphs will be used. * * @return a corresponding graph */ public DirectedAcyclicGraph<ContextView> contextGrammarIncludeDAG() { // NOTE with introduction rename and hide constructs // it might be required to introduce context tag parameter. return contextGrammarIncludeDAG; } /** * @return a root grammar */ public GrammarView rootGrammar() { return rootGrammar; } /** * @return a collection of grammars in this assembly */ public Collection<GrammarView> grammars() { return grammarViews.values(); } /** * @return True if there were errors during creation of view */ public boolean hadErrors() { return hadErrors; } /** * Report non fatal grammar error * * @param view * @param e * element in error * @param errorId * error identifier * @param args * error arguments */ public void error(GrammarView view, Element e, String errorId, Object args[]) { final TextPos startPos = e.start; final TextPos endPos = e.end; final ErrorInfo ei = new ErrorInfo(errorId, args, startPos, endPos, view.getSystemId()); final TextPos contextPos = contextTermParser.currentPos(); contextTermParser.append(new TermToken(Terms.GRAMMAR_ERROR, contextPos, contextPos, ei)); hadErrors = true; } /** * Report error * * @param string * error id * @param arg1 * error arg1 * @param arg2 * error arg2 */ private void error(String string, Object arg1, Object arg2) { hadErrors = true; final TextPos pos = contextTermParser.currentPos(); final ErrorInfo ei = new ErrorInfo(string, new Object[] { arg1, arg2 }, pos, pos, contextTermParser.getSystemId()); contextTermParser.append(new TermToken(Terms.GRAMMAR_ERROR, pos, pos, ei)); } /** * Resolve grammar using locator * * @param sourceSystemId * @param grammarSystemId * @param grammarPublicId * @return the resolved grammar */ public GrammarView resolveGrammar(String sourceSystemId, String grammarSystemId, String grammarPublicId) { final InputSource source = locator.resolveGrammar(contextTermParser, sourceSystemId, grammarSystemId, grammarPublicId); return resolveGrammar(source); } }