FormatSource.java example

Explorer
etl-java-master
/*
 * Reference ETL Parser for Java
 * Copyright (c) 2000-2009 Constantine A Plotnikov
 *
 * Permission is hereby granted, free of charge, to any person 
 * obtaining a copy of this software and associated documentation 
 * files (the "Software"), to deal in the Software without restriction,
 * including without limitation the rights to use, copy, modify, merge, 
 * publish, distribute, sublicense, and/or sell copies of the Software, 
 * and to permit persons to whom the Software is furnished to do so, 
 * subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be 
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
 * SOFTWARE. 
 */
package net.sf.etl.parsers.utils;

import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.HashSet;

import net.sf.etl.parsers.TermParser;
import net.sf.etl.parsers.TermToken;
import net.sf.etl.parsers.Terms;
import net.sf.etl.parsers.TextPos;
import net.sf.etl.parsers.Token;
import net.sf.etl.parsers.Tokens;

/**
 * This class implements default ETL source code formatting. Formatting is very
 * trivial right now. The formatting is done as the following:
 * <ul>
 * <li>
 * <p>
 * Blocks are formatted as the following:
 * </p>
 * 
 * <pre>
 * start {
 *    text;
 * } then {
 *    text;
 * } end;
 * </pre>
 * 
 * </li>
 * 
 * <li>The whitespace is ignored.</li>
 * <li>Attributes are started on the new line. After the end of each attribute
 * object a new line is forced.</li>
 * <li>The documentation comments are indented and they always put on the
 * separate line.</li>
 * <li>The line and block comments are not touched, if they start at the
 * beginning of the line. Otherwise they are indented to the current level if
 * there were no tokens on the current line. For multiline block comments,
 * additional parts are not touched.</li>
 * <li>If the line comment is inside segment, then the segment is continued on
 * the current indentation level on the next line.</li>
 * <li>The tab character is used for indentation.</li>
 * </ul>
 * 
 * @author const
 */
public class FormatSource extends AbstractFileConverter {
	/**
	 * A string used to build indent sources
	 */
	String indentationString = "\t";
	/** current indentation level */
	int indentLevel = 0;
	/** true if there already were non whitespace tokens on the current line */
	boolean wereTokens = false;
	/** true if the new line is needed to start next line */
	boolean needNewLine = false;
	/**
	 * if true there was a new line in input between printed token and a new
	 * token
	 */
	boolean wasNewLine = true;
	/** if true, the next space character is suppressed */
	boolean spaceSuppressed = false;
	/** an output */
	PrintWriter out;
	/** last token printed */
	private Token lastPrinted;
	/** graphics around which space is suppressed */
	private final HashSet<String> graphicsWithSuppressedSpace = new HashSet<String>();

	/**
	 * A constructor
	 */
	public FormatSource() {
		// FIXME make configuration. Possibly more flexible configuration is
		// required, for example one that consider context of the expression.

		graphicsWithSuppressedSpace.add(".");
		graphicsWithSuppressedSpace.add(":");
	}

	/**
	 * Application entry point
	 * 
	 * @param args
	 *            application arguments
	 */
	public static void main(String[] args) {
		try {
			new FormatSource().start(args);
		} catch (Throwable t) {
			t.printStackTrace();
			System.exit(1);
		}
	}

	/**
	 * {@inheritDoc}
	 */
	@Override
	protected void processContent(OutputStream out, TermParser p)
			throws Exception {
		// FIXME encoding
		this.out = new PrintWriter(out);
		formatBlockContent(p);
		this.out.flush();
	}

	/**
	 * Format content of the block or top level source
	 * 
	 * @param p
	 *            a term parser
	 */
	private void formatBlockContent(TermParser p) {
		while (p.current().kind() != Terms.EOF
				&& p.current().kind() != Terms.BLOCK_END) {
			TermToken tt = p.current();
			Token tk = token(tt);
			switch (tt.kind()) {
			case IGNORABLE:
				processIgnorable(p, tk);
				break;
			case CONTROL:
				// Whatever token was, advance to the next token.
				// Note that actual '{', '}' and ';' tokens are printed
				// by the block and segment parsing code.
				p.advance();
				break;
			case SEGMENT_START:
				formatSegment(p);
				break;
			case LEXICAL_ERROR:
				// lexical errors are treated the same as single line block
				// comments.
				startBlockContentComment(tk);
				print(tk);
				// FIXME REPORT
				p.advance();
				break;
			case GRAMMAR_IS_LOADED:
				// FIXME log?
				p.advance();
				break;
			case GRAMMAR_ERROR:
			case SYNTAX_ERROR:
			case SEGMENT_ERROR:
				// FIXME REPORT
				p.advance();
				break;
			default:
				assert false : "it should be never encountered here: " + tt;
				break;
			}
		}
	}

	/**
	 * Get lexical token from term token
	 * 
	 * @param tt
	 *            a term token
	 * @return a token from lexer or null
	 */
	private Token token(TermToken tt) {
		return tt.hasLexicalToken() ? tt.token().token() : null;
	}

	/**
	 * Process ignorable token
	 * 
	 * @param p
	 *            a parser
	 * @param tk
	 *            a token
	 */
	private void processIgnorable(TermParser p, Token tk) {
		switch (tk.kind()) {
		case NEWLINE:
			wasNewLine = true;
			break;
		case DOC_COMMENT:
			// Note if doc comment is classified as ignorable, then it is
			// encountered in the context where doc comments cannot happen and
			// it should be treated the same as a line comment.
		case LINE_COMMENT:
			startBlockContentComment(tk);
			print(tk);
			forceNewLine();
			break;
		case BLOCK_COMMENT:
			startBlockContentComment(tk);
			print(tk);
			break;
		case BLOCK_COMMENT_START:
		case BLOCK_COMMENT_PART:
		case BLOCK_COMMENT_END:
			throw new IllegalStateException("Term parser is assumed "
					+ "not to report partial tokens: " + tk);
		}
		// Whatever token was, advance to the next token.
		p.advance();
	}

	/**
	 * Format the segment
	 * 
	 * @param p
	 *            the parser
	 */
	private void formatSegment(TermParser p) {
		startIndentedLine();
		consume(p, Terms.SEGMENT_START);
		while (p.current().kind() != Terms.SEGMENT_END) {
			TermToken tt = p.current();
			Token tk = token(tt);
			switch (tt.kind()) {
			case CONTROL:
				p.advance();
				break;
			case IGNORABLE:
				processIgnorable(p, tk);
				break;
			case DOC_COMMENT_START:
				formatDocComments(p);
				break;
			case ATTRIBUTES_START:
				formatAttributes(p);
				break;
			default:
				formatSegmentContent(p);
			}

		}
		printControl(";");
		consume(p, Terms.SEGMENT_END);
	}

	/**
	 * Format attributes
	 * 
	 * @param p
	 *            a parser
	 */
	private void formatAttributes(TermParser p) {
		consume(p, Terms.ATTRIBUTES_START);
		int objects = 0;
		while (p.current().kind() != Terms.ATTRIBUTES_END) {
			TermToken tt = p.current();
			Token tk = token(tt);
			switch (tt.kind()) {
			case CONTROL:
				p.advance();
				break;
			case IGNORABLE:
				processIgnorable(p, tk);
				break;
			case OBJECT_START:
				objects++;
				p.advance();
				break;
			case OBJECT_END:
				objects--;
				if (objects == 0) {
					forceNewLine();
				}
				p.advance();
				break;
			default:
				formatSegmentContent(p);
			}
		}
		consume(p, Terms.ATTRIBUTES_END);
		startIndentedLine();
	}

	/**
	 * Process a token inside segment contents. The methods processes either a
	 * single token or the block.
	 * 
	 * @param p
	 *            a a parser
	 */
	private void formatSegmentContent(TermParser p) {
		TermToken tt = p.current();
		Token tk = token(tt);
		switch (tt.kind()) {
		case CONTROL:
			p.advance();
			break;
		case IGNORABLE:
			processIgnorable(p, tk);
			break;
		case BLOCK_START:
			space();
			consume(p, Terms.BLOCK_START);
			printControl("{");
			forceNewLine();
			indentLevel++;
			formatBlockContent(p);
			indentLevel--;
			startIndentedLine();
			printControl("}");
			consume(p, Terms.BLOCK_END);
			break;
		default:
			if (tk != null) {
				switch (tk.kind()) {
				case OPEN_ROUND:
					print(tk);
					spaceSuppressed = true;
					break;
				case OPEN_SQUARE:
					space();
					print(tk);
					break;
				case COMMA:
				case CLOSE_ROUND:
					print(tk);
					break;
				case CLOSE_SQUARE:
					spaceSuppressed = false;
					space();
					print(tk);
					break;
				case GRAPHICS:
					if (graphicsWithSuppressedSpace.contains(tk.text())) {
						if (lastPrinted.kind() == Tokens.GRAPHICS) {
							spaceSuppressed = false;
							space();
						}
						print(tk);
						spaceSuppressed = true;
					} else {
						if (lastPrinted.kind() == Tokens.GRAPHICS) {
							spaceSuppressed = false;
						}
						space();
						print(tk);
					}
					break;
				default:
					space();
					print(tk);
				}
			}
			p.advance();
		}
	}

	/**
	 * Consume term token of the specified kind
	 * 
	 * @param p
	 *            a parser
	 * @param kind
	 *            the expected token kind
	 */
	private void consume(TermParser p, Terms kind) {
		if (p.current().kind() != kind) {
			throw new IllegalStateException("The current token " + p.current()
					+ " does not match expected kind " + kind);
		}
		p.advance();
	}

	/**
	 * Format documentation comments
	 * 
	 * @param p
	 *            a parser
	 */
	private void formatDocComments(TermParser p) {
		consume(p, Terms.DOC_COMMENT_START);
		while (p.current().kind() != Terms.DOC_COMMENT_END) {
			TermToken tt = p.current();
			Token tk = token(tt);
			switch (tt.kind()) {
			case IGNORABLE:
				processIgnorable(p, tk);
				break;
			case VALUE:
				if (wereTokens) {
					startIndentedLine();
				}
				print(tk);
				forceNewLine();
				p.advance();
				break;
			default:
				p.advance();
				break;
			}
		}
		consume(p, Terms.DOC_COMMENT_END);
		startIndentedLine();
	}

	/**
	 * Start comment inside block content
	 * 
	 * @param tt
	 *            a term token
	 */
	private void startBlockContentComment(Token tt) {
		if (wereTokens && !wasNewLine) {
			space();
		} else {
			if (tt.start().column() != TextPos.START_COLUMN) {
				startIndentedLine();
			} else {
				startLine();
			}
		}
	}

	/**
	 * force new line
	 */
	private void forceNewLine() {
		out.print('\n');
		needNewLine = false;
	}

	/**
	 * Print the text
	 * 
	 * @param text
	 *            a text to print
	 */
	private void print(Token text) {
		lastPrinted = text;
		printControl(text.text());
	}

	/**
	 * Print the text
	 * 
	 * @param text
	 *            a text to print
	 */
	private void printControl(String text) {
		out.print(text);
		wasNewLine = false;
		spaceSuppressed = false;
		wereTokens = true;
	}

	/**
	 * print single space character
	 */
	private void space() {
		if (wereTokens && !spaceSuppressed) {
			out.print(' ');
		}
	}

	/**
	 * start a line
	 */
	private void startLine() {
		if (needNewLine) {
			out.print('\n');
		} else {
			needNewLine = true;
		}
		wasNewLine = false;
		wereTokens = false;
	}

	/**
	 * print single space character
	 */
	private void startIndentedLine() {
		startLine();
		for (int i = 0; i < indentLevel; i++) {
			out.print(indentationString);
		}
	}
}