/*
* Reference ETL Parser for Java
* Copyright (c) 2000-2009 Constantine A Plotnikov
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package net.sf.etl.parsers.utils;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.HashSet;
import net.sf.etl.parsers.TermParser;
import net.sf.etl.parsers.TermToken;
import net.sf.etl.parsers.Terms;
import net.sf.etl.parsers.TextPos;
import net.sf.etl.parsers.Token;
import net.sf.etl.parsers.Tokens;
/**
* This class implements default ETL source code formatting. Formatting is very
* trivial right now. The formatting is done as the following:
* <ul>
* <li>
* <p>
* Blocks are formatted as the following:
* </p>
*
* <pre>
* start {
* text;
* } then {
* text;
* } end;
* </pre>
*
* </li>
*
* <li>The whitespace is ignored.</li>
* <li>Attributes are started on the new line. After the end of each attribute
* object a new line is forced.</li>
* <li>The documentation comments are indented and they always put on the
* separate line.</li>
* <li>The line and block comments are not touched, if they start at the
* beginning of the line. Otherwise they are indented to the current level if
* there were no tokens on the current line. For multiline block comments,
* additional parts are not touched.</li>
* <li>If the line comment is inside segment, then the segment is continued on
* the current indentation level on the next line.</li>
* <li>The tab character is used for indentation.</li>
* </ul>
*
* @author const
*/
public class FormatSource extends AbstractFileConverter {
/**
* A string used to build indent sources
*/
String indentationString = "\t";
/** current indentation level */
int indentLevel = 0;
/** true if there already were non whitespace tokens on the current line */
boolean wereTokens = false;
/** true if the new line is needed to start next line */
boolean needNewLine = false;
/**
* if true there was a new line in input between printed token and a new
* token
*/
boolean wasNewLine = true;
/** if true, the next space character is suppressed */
boolean spaceSuppressed = false;
/** an output */
PrintWriter out;
/** last token printed */
private Token lastPrinted;
/** graphics around which space is suppressed */
private final HashSet<String> graphicsWithSuppressedSpace = new HashSet<String>();
/**
* A constructor
*/
public FormatSource() {
// FIXME make configuration. Possibly more flexible configuration is
// required, for example one that consider context of the expression.
graphicsWithSuppressedSpace.add(".");
graphicsWithSuppressedSpace.add(":");
}
/**
* Application entry point
*
* @param args
* application arguments
*/
public static void main(String[] args) {
try {
new FormatSource().start(args);
} catch (Throwable t) {
t.printStackTrace();
System.exit(1);
}
}
/**
* {@inheritDoc}
*/
@Override
protected void processContent(OutputStream out, TermParser p)
throws Exception {
// FIXME encoding
this.out = new PrintWriter(out);
formatBlockContent(p);
this.out.flush();
}
/**
* Format content of the block or top level source
*
* @param p
* a term parser
*/
private void formatBlockContent(TermParser p) {
while (p.current().kind() != Terms.EOF
&& p.current().kind() != Terms.BLOCK_END) {
TermToken tt = p.current();
Token tk = token(tt);
switch (tt.kind()) {
case IGNORABLE:
processIgnorable(p, tk);
break;
case CONTROL:
// Whatever token was, advance to the next token.
// Note that actual '{', '}' and ';' tokens are printed
// by the block and segment parsing code.
p.advance();
break;
case SEGMENT_START:
formatSegment(p);
break;
case LEXICAL_ERROR:
// lexical errors are treated the same as single line block
// comments.
startBlockContentComment(tk);
print(tk);
// FIXME REPORT
p.advance();
break;
case GRAMMAR_IS_LOADED:
// FIXME log?
p.advance();
break;
case GRAMMAR_ERROR:
case SYNTAX_ERROR:
case SEGMENT_ERROR:
// FIXME REPORT
p.advance();
break;
default:
assert false : "it should be never encountered here: " + tt;
break;
}
}
}
/**
* Get lexical token from term token
*
* @param tt
* a term token
* @return a token from lexer or null
*/
private Token token(TermToken tt) {
return tt.hasLexicalToken() ? tt.token().token() : null;
}
/**
* Process ignorable token
*
* @param p
* a parser
* @param tk
* a token
*/
private void processIgnorable(TermParser p, Token tk) {
switch (tk.kind()) {
case NEWLINE:
wasNewLine = true;
break;
case DOC_COMMENT:
// Note if doc comment is classified as ignorable, then it is
// encountered in the context where doc comments cannot happen and
// it should be treated the same as a line comment.
case LINE_COMMENT:
startBlockContentComment(tk);
print(tk);
forceNewLine();
break;
case BLOCK_COMMENT:
startBlockContentComment(tk);
print(tk);
break;
case BLOCK_COMMENT_START:
case BLOCK_COMMENT_PART:
case BLOCK_COMMENT_END:
throw new IllegalStateException("Term parser is assumed "
+ "not to report partial tokens: " + tk);
}
// Whatever token was, advance to the next token.
p.advance();
}
/**
* Format the segment
*
* @param p
* the parser
*/
private void formatSegment(TermParser p) {
startIndentedLine();
consume(p, Terms.SEGMENT_START);
while (p.current().kind() != Terms.SEGMENT_END) {
TermToken tt = p.current();
Token tk = token(tt);
switch (tt.kind()) {
case CONTROL:
p.advance();
break;
case IGNORABLE:
processIgnorable(p, tk);
break;
case DOC_COMMENT_START:
formatDocComments(p);
break;
case ATTRIBUTES_START:
formatAttributes(p);
break;
default:
formatSegmentContent(p);
}
}
printControl(";");
consume(p, Terms.SEGMENT_END);
}
/**
* Format attributes
*
* @param p
* a parser
*/
private void formatAttributes(TermParser p) {
consume(p, Terms.ATTRIBUTES_START);
int objects = 0;
while (p.current().kind() != Terms.ATTRIBUTES_END) {
TermToken tt = p.current();
Token tk = token(tt);
switch (tt.kind()) {
case CONTROL:
p.advance();
break;
case IGNORABLE:
processIgnorable(p, tk);
break;
case OBJECT_START:
objects++;
p.advance();
break;
case OBJECT_END:
objects--;
if (objects == 0) {
forceNewLine();
}
p.advance();
break;
default:
formatSegmentContent(p);
}
}
consume(p, Terms.ATTRIBUTES_END);
startIndentedLine();
}
/**
* Process a token inside segment contents. The methods processes either a
* single token or the block.
*
* @param p
* a a parser
*/
private void formatSegmentContent(TermParser p) {
TermToken tt = p.current();
Token tk = token(tt);
switch (tt.kind()) {
case CONTROL:
p.advance();
break;
case IGNORABLE:
processIgnorable(p, tk);
break;
case BLOCK_START:
space();
consume(p, Terms.BLOCK_START);
printControl("{");
forceNewLine();
indentLevel++;
formatBlockContent(p);
indentLevel--;
startIndentedLine();
printControl("}");
consume(p, Terms.BLOCK_END);
break;
default:
if (tk != null) {
switch (tk.kind()) {
case OPEN_ROUND:
print(tk);
spaceSuppressed = true;
break;
case OPEN_SQUARE:
space();
print(tk);
break;
case COMMA:
case CLOSE_ROUND:
print(tk);
break;
case CLOSE_SQUARE:
spaceSuppressed = false;
space();
print(tk);
break;
case GRAPHICS:
if (graphicsWithSuppressedSpace.contains(tk.text())) {
if (lastPrinted.kind() == Tokens.GRAPHICS) {
spaceSuppressed = false;
space();
}
print(tk);
spaceSuppressed = true;
} else {
if (lastPrinted.kind() == Tokens.GRAPHICS) {
spaceSuppressed = false;
}
space();
print(tk);
}
break;
default:
space();
print(tk);
}
}
p.advance();
}
}
/**
* Consume term token of the specified kind
*
* @param p
* a parser
* @param kind
* the expected token kind
*/
private void consume(TermParser p, Terms kind) {
if (p.current().kind() != kind) {
throw new IllegalStateException("The current token " + p.current()
+ " does not match expected kind " + kind);
}
p.advance();
}
/**
* Format documentation comments
*
* @param p
* a parser
*/
private void formatDocComments(TermParser p) {
consume(p, Terms.DOC_COMMENT_START);
while (p.current().kind() != Terms.DOC_COMMENT_END) {
TermToken tt = p.current();
Token tk = token(tt);
switch (tt.kind()) {
case IGNORABLE:
processIgnorable(p, tk);
break;
case VALUE:
if (wereTokens) {
startIndentedLine();
}
print(tk);
forceNewLine();
p.advance();
break;
default:
p.advance();
break;
}
}
consume(p, Terms.DOC_COMMENT_END);
startIndentedLine();
}
/**
* Start comment inside block content
*
* @param tt
* a term token
*/
private void startBlockContentComment(Token tt) {
if (wereTokens && !wasNewLine) {
space();
} else {
if (tt.start().column() != TextPos.START_COLUMN) {
startIndentedLine();
} else {
startLine();
}
}
}
/**
* force new line
*/
private void forceNewLine() {
out.print('\n');
needNewLine = false;
}
/**
* Print the text
*
* @param text
* a text to print
*/
private void print(Token text) {
lastPrinted = text;
printControl(text.text());
}
/**
* Print the text
*
* @param text
* a text to print
*/
private void printControl(String text) {
out.print(text);
wasNewLine = false;
spaceSuppressed = false;
wereTokens = true;
}
/**
* print single space character
*/
private void space() {
if (wereTokens && !spaceSuppressed) {
out.print(' ');
}
}
/**
* start a line
*/
private void startLine() {
if (needNewLine) {
out.print('\n');
} else {
needNewLine = true;
}
wasNewLine = false;
wereTokens = false;
}
/**
* print single space character
*/
private void startIndentedLine() {
startLine();
for (int i = 0; i < indentLevel; i++) {
out.print(indentationString);
}
}
}