GrammarAssembly.java example

Explorer
etl-java-master
/*
 * Reference ETL Parser for Java
 * Copyright (c) 2000-2009 Constantine A Plotnikov
 *
 * Permission is hereby granted, free of charge, to any person 
 * obtaining a copy of this software and associated documentation 
 * files (the "Software"), to deal in the Software without restriction,
 * including without limitation the rights to use, copy, modify, merge, 
 * publish, distribute, sublicense, and/or sell copies of the Software, 
 * and to permit persons to whom the Software is furnished to do so, 
 * subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be 
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
 * SOFTWARE. 
 */
package net.sf.etl.parsers.internal.term_parser.flattened;

import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import net.sf.etl.parsers.ErrorInfo;
import net.sf.etl.parsers.PhraseParser;
import net.sf.etl.parsers.PhraseParserFactory;
import net.sf.etl.parsers.StandardGrammars;
import net.sf.etl.parsers.TermParser;
import net.sf.etl.parsers.TermParserFactory;
import net.sf.etl.parsers.TermToken;
import net.sf.etl.parsers.Terms;
import net.sf.etl.parsers.TextPos;
import net.sf.etl.parsers.internal.term_parser.DefaultTermParser;
import net.sf.etl.parsers.internal.term_parser.GrammarLocator;
import net.sf.etl.parsers.internal.term_parser.bootstrap.BootstrapETLParserLite;
import net.sf.etl.parsers.internal.term_parser.flattened.DirectedAcyclicGraph.Node;
import net.sf.etl.parsers.internal.term_parser.grammar.Element;
import net.sf.etl.parsers.internal.term_parser.grammar.Grammar;
import net.sf.etl.parsers.internal.term_parser.grammar.GrammarLiteTermParser;

import org.xml.sax.InputSource;

/**
 * This class represents an assembly of grammars that are being compiled
 * together. It creates flattened grammar representations.
 * 
 * @author const
 */
// NOTE POST 0.2: avoid loading already compiled grammars that are referred
// only by grammar import. However we still need to load included grammars.
public class GrammarAssembly {
	/**
	 * A logger for this class
	 */
	private final static Logger log = Logger.getLogger(GrammarAssembly.class
			.getName());
	/**
	 * A root grammar.
	 */
	GrammarView rootGrammar;
	/**
	 * A map from systemId to grammar view
	 */
	private final Map<String, GrammarView> grammarViews = new LinkedHashMap<String, GrammarView>();
	/**
	 * Grammar inclusion DAG
	 */
	private final DirectedAcyclicGraph<GrammarView> grammarIncludeDAG = new DirectedAcyclicGraph<GrammarView>();
	/**
	 * Context inclusions along with grammars
	 */
	private final DirectedAcyclicGraph<ContextView> contextGrammarIncludeDAG = new DirectedAcyclicGraph<ContextView>();
	/**
	 * True if there were errors in this grammar
	 */
	private boolean hadErrors;
	/**
	 * A grammar locator associated with this assembly
	 */
	private final GrammarLocator locator;
	/**
	 * A term parser that initially requested loading of the grammar. This term
	 * parser is used for reporting errors to.
	 */
	private final DefaultTermParser contextTermParser;

	/**
	 * A constructor
	 * 
	 * @param locator
	 *            a grammar locator
	 * @param contextTermParser
	 *            a context term parser
	 */
	public GrammarAssembly(GrammarLocator locator,
			DefaultTermParser contextTermParser) {
		super();
		this.locator = locator;
		this.contextTermParser = contextTermParser;
	}

	/**
	 * Resolve reference to external grammar using systemId and publicId. Also
	 * no processing is done on grammar view.
	 * 
	 * Note that either system id or public id might be null but not both at the
	 * same time.
	 * 
	 * @param source
	 *            a source to parse
	 * @return a view of grammar. If grammar assembly returned an object, it
	 *         will always return the same object again.
	 */
	public GrammarView resolveGrammar(InputSource source) {
		assert source.getSystemId() != null
				|| StandardGrammars.ETL_GRAMMAR_PUBLIC_ID.equals(source
						.getPublicId());
		if (StandardGrammars.ETL_GRAMMAR_SYSTEM_ID.equals(source.getSystemId())
				|| StandardGrammars.ETL_GRAMMAR_PUBLIC_ID.equals(source
						.getPublicId())) {
			// close all opened stream
			if (source.getCharacterStream() != null) {
				try {
					source.getCharacterStream().close();
					source.setCharacterStream(null);
				} catch (final Exception ex) {
					// do nothing
				}
			}
			if (source.getByteStream() != null) {
				try {
					source.getByteStream().close();
					source.setByteStream(null);
				} catch (final Exception ex) {
					// do nothing
				}
			}
			source.setEncoding(null);
			source.setPublicId(StandardGrammars.ETL_GRAMMAR_PUBLIC_ID);
			source.setSystemId(StandardGrammars.ETL_GRAMMAR_SYSTEM_ID);
		}
		GrammarView v = grammarViews.get(source.getSystemId());
		if (v == null) {
			// In bootstrap mode only one grammar might be read.
			final Grammar g = parseGrammar(source);
			if (g != null) {
				v = new GrammarView(this, g, source.getSystemId(), source
						.getPublicId());
				grammarViews.put(v.getSystemId(), v);
			}
		}
		return v;
	}

	/**
	 * parse resource
	 * 
	 * @param source
	 *            a resource to parse
	 * @return parsed grammar
	 */
	private Grammar parseGrammar(InputSource source) {
		if (StandardGrammars.ETL_GRAMMAR_SYSTEM_ID.equals(source.getSystemId())) {
			final PhraseParser phraseParser = PhraseParserFactory.newInstance()
					.newPhraseParser();
			phraseParser.parse(source);
			try {
				final BootstrapETLParserLite parser = new BootstrapETLParserLite(
						phraseParser);
				return parser.parse();
			} finally {
				phraseParser.close();
			}
		} else {
			try {
				final TermParser termParser = TermParserFactory.newInstance()
						.newTermParser();
				termParser.parse(source);
				try {
					termParser.advance();
					final GrammarLiteTermParser parser = new GrammarLiteTermParser(
							termParser) {
						/**
						 * @see net.sf.etl.parsers.utils.AbstractTreeParser#handleErrorFromParser(net.sf.etl.parsers.TermToken)
						 */
						@Override
						protected void handleErrorFromParser(
								TermToken errorToken) {
							ErrorInfo ei = errorToken.errorInfo();
							TextPos pos = contextTermParser.currentPos();
							contextTermParser.append(new TermToken(
									Terms.GRAMMAR_ERROR, pos, pos, ei));
						}
					};
					parser.ignoreNamespace(StandardGrammars.DOCTYPE_NS);
					parser.setAbortOnDefaultGrammar(true);
					parser.ignoreObjects(
							StandardGrammars.ETL_GRAMMAR_NAMESPACE,
							"BlankTopLevel");
					if (parser.hasNext()) {
						final Object o = parser.next();
						if (parser.hadErrors()) {
							error("grammar.ParseError", source.getPublicId(),
									source.getSystemId());
							return null;
						}
						if (parser.hasNext()) {
							error("grammar.TooManyGrammars", source
									.getPublicId(), source.getSystemId());
						}
						return (Grammar) o;
					} else {
						error("grammar.EmptyGrammar", source.getPublicId(),
								source.getSystemId());
						return null;
					}
				} finally {
					termParser.close();
				}
			} catch (final Exception ex) {
				log.log(Level.SEVERE,
						"Exception during parsing grammar with public id \""
								+ source.getPublicId() + "\" and systemId \""
								+ source.getSystemId() + "\"", ex);
				error("grammar.ParseError", source.getPublicId(), source
						.getSystemId());
				return null;
			}
		}
	}

	/**
	 * Get include node in DAG
	 * 
	 * @param view
	 *            view that will be represented by node
	 * @return a node in include graph
	 */
	public Node<GrammarView> getIncludeNode(GrammarView view) {
		return grammarIncludeDAG.getNode(view);
	}

	/**
	 * Process grammars creating flattened view of them.
	 * 
	 * @param source
	 *            a source of root grammar to process
	 */
	public void processGrammars(InputSource source) {
		rootGrammar = resolveGrammar(source);
		if (rootGrammar == null) {
			assert hadErrors();
			return;
		}
		if (rootGrammar.isAbstract()) {
			error("grammar.AbstractRootGrammar", rootGrammar.getSystemId(),
					source.getPublicId());
		}
		rootGrammar.loadRelatedGrammars(new HashSet<GrammarView>());
		if (hadErrors()) {
			return;
		}
		// M1: All referenced grammars are loaded.
		grammarIncludeDAG.minimizeImmediate();
		final List<GrammarView> grammars = grammarIncludeDAG
				.topologicalSortObjects();
		for (final Iterator<GrammarView> i = grammars.iterator(); i.hasNext();) {
			final GrammarView v = i.next();
			v.gatherImports();
		}
		if (hadErrors()) {
			return;
		}
		// M2: All grammars has set of imports built.
		for (final Iterator<GrammarView> i = grammars.iterator(); i.hasNext();) {
			final GrammarView v = i.next();
			v.buildContexts();
		}
		if (hadErrors()) {
			return;
		}
		// M3: All contexts for grammars are created, include relationships are
		// created.
		contextGrammarIncludeDAG.minimizeImmediate();
		final List<ContextView> contextsByGrammarInclude = contextGrammarIncludeDAG
				.topologicalSortObjects();
		for (final Iterator<ContextView> i = contextsByGrammarInclude
				.iterator(); i.hasNext();) {
			final ContextView v = i.next();
			v.implementGrammarInclude();
		}
		if (hadErrors()) {
			return;
		}
		// M4: Imports and definitions are gathered by direction of grammar
		// include
		// After this step it is not necessary to process abstract grammars and
		// all
		// / processing is done locally to grammars because it does not have to
		// deal
		// / with cross grammar definitions.
		for (final Iterator<GrammarView> i = grammars.iterator(); i.hasNext();) {
			final GrammarView v = i.next();
			if (!v.isAbstract()) {
				v.flattenGrammar();
			}
		}
		// NOTE POST 0.2: Should be there validation phase?
	}

	/**
	 * Get DAG that represents context grammar includes. Currently a common
	 * graph used. In future possibly different graphs will be used.
	 * 
	 * @return a corresponding graph
	 */
	public DirectedAcyclicGraph<ContextView> contextGrammarIncludeDAG() {
		// NOTE with introduction rename and hide constructs
		// it might be required to introduce context tag parameter.
		return contextGrammarIncludeDAG;
	}

	/**
	 * @return a root grammar
	 */
	public GrammarView rootGrammar() {
		return rootGrammar;
	}

	/**
	 * @return a collection of grammars in this assembly
	 */
	public Collection<GrammarView> grammars() {
		return grammarViews.values();
	}

	/**
	 * @return True if there were errors during creation of view
	 */
	public boolean hadErrors() {
		return hadErrors;
	}

	/**
	 * Report non fatal grammar error
	 * 
	 * @param view
	 * @param e
	 *            element in error
	 * @param errorId
	 *            error identifier
	 * @param args
	 *            error arguments
	 */
	public void error(GrammarView view, Element e, String errorId,
			Object args[]) {
		final TextPos startPos = e.start;
		final TextPos endPos = e.end;
		final ErrorInfo ei = new ErrorInfo(errorId, args, startPos, endPos,
				view.getSystemId());
		final TextPos contextPos = contextTermParser.currentPos();
		contextTermParser.append(new TermToken(Terms.GRAMMAR_ERROR, contextPos,
				contextPos, ei));
		hadErrors = true;
	}

	/**
	 * Report error
	 * 
	 * @param string
	 *            error id
	 * @param arg1
	 *            error arg1
	 * @param arg2
	 *            error arg2
	 */
	private void error(String string, Object arg1, Object arg2) {
		hadErrors = true;
		final TextPos pos = contextTermParser.currentPos();
		final ErrorInfo ei = new ErrorInfo(string, new Object[] { arg1, arg2 },
				pos, pos, contextTermParser.getSystemId());
		contextTermParser.append(new TermToken(Terms.GRAMMAR_ERROR, pos, pos,
				ei));
	}

	/**
	 * Resolve grammar using locator
	 * 
	 * @param sourceSystemId
	 * @param grammarSystemId
	 * @param grammarPublicId
	 * @return the resolved grammar
	 */
	public GrammarView resolveGrammar(String sourceSystemId,
			String grammarSystemId, String grammarPublicId) {
		final InputSource source = locator.resolveGrammar(contextTermParser,
				sourceSystemId, grammarSystemId, grammarPublicId);
		return resolveGrammar(source);
	}

}