Indentation.java example

Explorer
craken-master
/*****************************************************************************
 * Copyright (C) Codehaus.org                                                *
 * ------------------------------------------------------------------------- *
 * Licensed under the Apache License, Version 2.0 (the "License");           *
 * you may not use this file except in compliance with the License.          *
 * You may obtain a copy of the License at                                   *
 *                                                                           *
 * http://www.apache.org/licenses/LICENSE-2.0                                *
 *                                                                           *
 * Unless required by applicable law or agreed to in writing, software       *
 * distributed under the License is distributed on an "AS IS" BASIS,         *
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  *
 * See the License for the specific language governing permissions and       *
 * limitations under the License.                                            *
 *****************************************************************************/

package net.ion.rosetta;

import java.util.List;
import java.util.Stack;

import net.ion.rosetta.annotations.Private;
import net.ion.rosetta.functors.Unary;
import net.ion.rosetta.pattern.CharPredicate;
import net.ion.rosetta.pattern.Pattern;
import net.ion.rosetta.pattern.Patterns;
import net.ion.rosetta.util.Lists;
import net.ion.rosetta.util.Objects;

/**
 * Processes indentation based lexical structure according to the <a href="http://en.wikipedia.org/wiki/Off-side_rule">Off-side rule</a>.
 * 
 * @author Ben Yu
 */
public final class Indentation {

	/**
	 * A {@link CharPredicate} that returns true only if the character isn't line feed and {@link Character#isWhitespace(char)} returns true.
	 */
	static final CharPredicate INLINE_WHITESPACE = new CharPredicate() {
		public boolean isChar(char c) {
			return c != '\n' && Character.isWhitespace(c);
		}

		@Override
		public String toString() {
			return "whitespace";
		}
	};

	/**
	 * A {@link Pattern} object that matches a line continuation. i.e. a backslash character ({@code '\'}) followed by some whitespaces and ended by a line feed character ({@code '\n'}). Is useful if the line feed character plays a role in the syntax (as in indentation-sensitive languages) and line continuation is supported.
	 */
	static final Pattern LINE_CONTINUATION = Patterns.sequence(Patterns.isChar('\\'), Patterns.many(INLINE_WHITESPACE), Patterns.isChar('\n'));

	/**
	 * A {@link Pattern} object that matches one or more whitespace characters or line continuations, where the line feed character ({@code '\n'}) is escaped by the backslash character ({@code '\'}).
	 */
	static final Pattern INLINE_WHITESPACES = Patterns.many1(INLINE_WHITESPACE);

	/**
	 * A {@link Parser} that recognizes 1 or more whitespace characters on the same line. Line continutation (escaped by a backslash character {@code '\'}) is considered the same line.
	 */
	public static final Parser<Void> WHITESPACES = Scanners.pattern(INLINE_WHITESPACES.or(LINE_CONTINUATION).many1(), "whitespaces");

	@Private
	static enum Punctuation {
		INDENT, OUTDENT, LF
	}

	private final Object indent;
	private final Object outdent;

	/**
	 * Creates an {@link Indentation} object that uses {@code indent} and {@code outdent} as the token values for indentation and outdentation.
	 */
	public Indentation(Object indent, Object outdent) {
		this.indent = indent;
		this.outdent = outdent;
	}

	/**
	 * Creates a {@link Indentation} object that generates default indent and outdent tokens.
	 */
	public Indentation() {
		this(Punctuation.INDENT, Punctuation.OUTDENT);
	}

	/** A {@link Parser} that recognizes the generated {@code indent} token. */
	public Parser<Token> indent() {
		return token(indent);
	}

	/** A {@link Parser} that recognizes the generated {@code outdent} token. */
	public Parser<Token> outdent() {
		return token(outdent);
	}

	/**
	 * A {@link Parser} that greedily runs {@code tokenizer}, and translates line feed characters ({@code '\n'}) to {@code indent} and {@code outdent} tokens. Return values are wrapped in {@link Token} objects and collected in a {@link List}. Patterns recognized by {@code delim} are ignored.
	 */
	public Parser<List<Token>> lexer(Parser<?> tokenizer, Parser<?> delim) {
		Parser<?> lf = Scanners.isChar('\n').retn(Punctuation.LF);
		return Parsers.plus(tokenizer, lf).lexer(delim).map(new Unary<List<Token>>() {
			public List<Token> map(List<Token> tokens) {
				return analyzeIndentations(tokens, Punctuation.LF);
			}

			@Override
			public String toString() {
				return "lexer";
			}
		});
	}

	private static Parser<Token> token(Object value) {
		return Parsers.token(InternalFunctors.tokenWithSameValue(value));
	}

	/**
	 * Analyzes indentation by looking at the first token after each {@code lf} and inserting {@code indent} and {@code outdent} tokens properly.
	 */
	List<Token> analyzeIndentations(List<Token> tokens, Object lf) {
		if (tokens.isEmpty()) {
			return tokens;
		}
		int size = tokens.size();
		List<Token> result = Lists.arrayList(size + size / 16);
		Stack<Integer> indentations = new Stack<Integer>();
		boolean freshLine = true;
		int lfIndex = 0;
		for (Token token : tokens) {
			if (freshLine) {
				int indentation = token.index() - lfIndex;
				if (Objects.equals(token.value(), lf)) {
					// if first token on a line is lf, indentation is ignored.
					indentation = 0;
				}
				newLine(token, indentations, indentation, result);
			}
			if (Objects.equals(token.value(), lf)) {
				freshLine = true;
				lfIndex = token.index() + token.length();
			} else {
				freshLine = false;
				result.add(token);
			}
		}
		Token lastToken = tokens.get(tokens.size() - 1);
		int endIndex = lastToken.index() + lastToken.length();
		Token outdentToken = pseudoToken(endIndex, outdent);
		for (int i = 0; i < indentations.size() - 1; i++) {
			// add outdent for every remaining indentation except the first one
			result.add(outdentToken);
		}
		return result;
	}

	private void newLine(Token token, Stack<Integer> indentations, int indentation, List<Token> result) {
		for (;;) {
			if (indentations.isEmpty()) {
				indentations.add(indentation);
				return;
			}
			int previousIndentation = indentations.peek();
			if (previousIndentation < indentation) {
				// indent
				indentations.push(indentation);
				result.add(pseudoToken(token.index(), indent));
				return;
			} else if (previousIndentation > indentation) {
				// outdent
				indentations.pop();
				if (indentations.isEmpty()) {
					return;
				}
				result.add(pseudoToken(token.index(), outdent));
				continue;
			}
			return;
		}
	}

	private static Token pseudoToken(int index, Object value) {
		return new Token(index, 0, value);
	}
}