/*
* Copyright (C) 2007-2010 JĂșlio Vilmar Gesser.
* Copyright (C) 2011, 2013-2015 The JavaParser Team.
*
* This file is part of JavaParser.
*
* JavaParser can be used either under the terms of
* a) the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* b) the terms of the Apache License
*
* You should have received a copy of both licenses in LICENCE.LGPL and
* LICENCE.APACHE. Please refer to those files for details.
*
* JavaParser is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*/
package com.twosigma.beaker.javash.evaluator;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.text.translate.AggregateTranslator;
import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
import java.util.Deque;
import java.util.LinkedList;
/**
* Idea from https://github.com/javaparser/javaparser
*/
public class ParserUtil {
public static final CharSequenceTranslator UNICODE_UNESCAPER = new AggregateTranslator(new UnicodeUnescaper());
private enum State {
CODE,
CAN_BE_COMMENT_START,
IN_LINE_COMMENT,
IN_BLOCK_COMMENT,
IN_STRING,
IN_CHAR
}
/**
* Track the internal state of the parser, remembering the last characters observed.
*/
static class ParserState {
private Deque prevTwoChars = new LinkedList<Character>();
/**
* Is the last character the one expected?
*/
boolean isLastChar(char expectedChar) {
return prevTwoChars.size() >= 1 && prevTwoChars.peekLast().equals(expectedChar);
}
/**
* Is the character before the last one the same as expectedChar?
*/
public boolean isSecondToLastChar(char expectedChar) {
return prevTwoChars.size() >= 1 && prevTwoChars.peekFirst().equals(expectedChar);
}
/**
* Record a new character. It will be the last one. The character that was the last one will
* become the second to last one.
*/
public void update(char c) {
if (prevTwoChars.size() == 2) {
prevTwoChars.remove();
}
prevTwoChars.add(c);
}
/**
* Remove all the characters observed.
*/
public void reset() {
while (!prevTwoChars.isEmpty()) {
prevTwoChars.removeFirst();
}
}
}
public static String removeComments(String javaCode) {
ParserState parserState = new ParserState();
State state = State.CODE;
StringBuffer inBlock = null;
StringBuilder builder = new StringBuilder();
for (int i = 0; i < javaCode.length(); i++) {
char c = javaCode.charAt(i);
switch (state) {
case CODE:
if (!parserState.isLastChar('/') && c == '/') {
state = State.CAN_BE_COMMENT_START;
} else if (c == '"') {
state = State.IN_STRING;
} else if (c == '\'') {
state = State.IN_CHAR;
}
builder.append(c);
break;
case CAN_BE_COMMENT_START:
if (parserState.isLastChar('/') && c == '/') {
if (builder.length() > 0) builder.setLength(builder.length() - 1);
state = State.IN_LINE_COMMENT;
} else if (parserState.isLastChar('/') && c == '*') {
if (builder.length() > 0) builder.setLength(builder.length() - 1);
state = State.IN_BLOCK_COMMENT;
inBlock = new StringBuffer();
} else {
state = State.CODE;
builder.append(c);
}
break;
case IN_LINE_COMMENT:
if (c == '\n' || c == '\r') {
state = State.CODE;
}
break;
case IN_BLOCK_COMMENT:
// '/*/' is not a valid block comment: it starts the block comment but it does not close it
// However this sequence can be contained inside a comment and in that case it close the comment
// For example:
// /* blah blah /*/
// At the previous line we had a valid block comment
assert inBlock != null;
if (parserState.isLastChar('*') && c == '/' && (!parserState.isSecondToLastChar('/')
|| inBlock.length() > 0)) {
state = State.CODE;
} else {
inBlock.append(c == '\r' ? '\n' : c);
}
break;
case IN_STRING:
if (!parserState.isLastChar('\\') && c == '"') {
state = State.CODE;
}
builder.append(c);
break;
case IN_CHAR:
if (!parserState.isLastChar('\\') && c == '\'') {
state = State.CODE;
}
builder.append(c);
break;
default:
throw new RuntimeException("Unexpected");
}
// ok we have two slashes in a row inside a string
// we want to replace them with... anything else, to not confuse
// the parser
if (state == State.IN_STRING && parserState.isLastChar('\\') && c == '\\') {
parserState.reset();
} else {
parserState.update(c);
}
}
return builder.toString();
}
/*
* This function does:
* 1) remove comments
* This function doesn't do anymore:
* 1) ensure we have a cr after each ';' (if not inside double quotes or single quotes)
* 2) remove empty lines
*/
public static String normalizeCode(String code) {
final String normalizedCode = ParserUtil.removeComments(UNICODE_UNESCAPER.translate(code));
return StringUtils.isNotBlank(normalizedCode) ? normalizedCode : StringUtils.EMPTY;
}
}