/* * Copyright (C) 2007-2010 JĂșlio Vilmar Gesser. * Copyright (C) 2011, 2013-2015 The JavaParser Team. * * This file is part of JavaParser. * * JavaParser can be used either under the terms of * a) the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * b) the terms of the Apache License * * You should have received a copy of both licenses in LICENCE.LGPL and * LICENCE.APACHE. Please refer to those files for details. * * JavaParser is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. */ package com.github.javaparser.ast.comments; import java.io.*; import java.nio.charset.Charset; import java.util.*; /** * This parser cares exclusively about comments. */ public class CommentsParser { private enum State { CODE, IN_LINE_COMMENT, IN_BLOCK_COMMENT, IN_STRING, IN_CHAR } private static final int COLUMNS_PER_TAB = 4; public CommentsCollection parse(final String source) throws IOException, UnsupportedEncodingException { InputStream in = new ByteArrayInputStream(source.getBytes(Charset.defaultCharset())); return parse(in, Charset.defaultCharset().name()); } /** * Track the internal state of the parser, remembering the last characters observed. */ class ParserState { private Deque prevTwoChars = new LinkedList<Character>(); /** * Is the last character the one expected? */ boolean isLastChar(char expectedChar) { return prevTwoChars.size() >= 1 && prevTwoChars.peekLast().equals(expectedChar); } /** * Is the character before the last one the same as expectedChar? */ public boolean isSecondToLastChar(char expectedChar) { return prevTwoChars.size() >= 1 && prevTwoChars.peekFirst().equals(expectedChar); } /** * Record a new character. It will be the last one. The character that was the last one will * become the second to last one. */ public void update(char c) { if (prevTwoChars.size() == 2) { prevTwoChars.remove(); } prevTwoChars.add(c); } /** * Remove all the characters observed. */ public void reset() { while (!prevTwoChars.isEmpty()) { prevTwoChars.removeFirst(); } } } public CommentsCollection parse(final InputStream in, final String charsetName) throws IOException, UnsupportedEncodingException { boolean lastWasASlashR = false; BufferedReader br = new BufferedReader(new InputStreamReader(in, charsetName)); CommentsCollection comments = new CommentsCollection(); int r; ParserState parserState = new ParserState(); State state = State.CODE; LineComment currentLineComment = null; BlockComment currentBlockComment = null; StringBuffer currentContent = null; int currLine = 1; int currCol = 1; while ((r=br.read()) != -1){ char c = (char)r; if (c=='\r'){ lastWasASlashR = true; } else if (c=='\n'&&lastWasASlashR){ lastWasASlashR=false; continue; } else { lastWasASlashR=false; } switch (state) { case CODE: if (parserState.isLastChar('/') && c == '/') { currentLineComment = new LineComment(); currentLineComment.setBeginLine(currLine); currentLineComment.setBeginColumn(currCol - 1); state = State.IN_LINE_COMMENT; currentContent = new StringBuffer(); } else if (parserState.isLastChar('/') && c == '*') { currentBlockComment = new BlockComment(); currentBlockComment.setBeginLine(currLine); currentBlockComment.setBeginColumn(currCol - 1); state = State.IN_BLOCK_COMMENT; currentContent = new StringBuffer(); } else if (c == '"') { state = State.IN_STRING; } else if (c == '\'') { state = State.IN_CHAR; } else { // nothing to do } break; case IN_LINE_COMMENT: if (c=='\n' || c=='\r'){ currentLineComment.setContent(currentContent.toString()); currentLineComment.setEndLine(currLine); currentLineComment.setEndColumn(currCol); comments.addComment(currentLineComment); state = State.CODE; } else { currentContent.append(c); } break; case IN_BLOCK_COMMENT: // '/*/' is not a valid block comment: it starts the block comment but it does not close it // However this sequence can be contained inside a comment and in that case it close the comment // For example: // /* blah blah /*/ // At the previous line we had a valid block comment if (parserState.isLastChar('*') && c=='/' && (!parserState.isSecondToLastChar('/') || currentContent.length() > 0)){ // delete last character String content = currentContent.deleteCharAt(currentContent.toString().length()-1).toString(); if (content.startsWith("*")){ JavadocComment javadocComment = new JavadocComment(); javadocComment.setContent(content.substring(1)); javadocComment.setBeginLine(currentBlockComment.getBeginLine()); javadocComment.setBeginColumn(currentBlockComment.getBeginColumn()); javadocComment.setEndLine(currLine); javadocComment.setEndColumn(currCol+1); comments.addComment(javadocComment); } else { currentBlockComment.setContent(content); currentBlockComment.setEndLine(currLine); currentBlockComment.setEndColumn(currCol+1); comments.addComment(currentBlockComment); } state = State.CODE; } else { currentContent.append(c=='\r'?'\n':c); } break; case IN_STRING: if (!parserState.isLastChar('\\') && c == '"') { state = State.CODE; } break; case IN_CHAR: if (!parserState.isLastChar('\\') && c == '\'') { state = State.CODE; } break; default: throw new RuntimeException("Unexpected"); } switch (c){ case '\n': case '\r': currLine+=1; currCol = 1; break; case '\t': currCol+=COLUMNS_PER_TAB; break; default: currCol+=1; } // ok we have two slashes in a row inside a string // we want to replace them with... anything else, to not confuse // the parser if (state==State.IN_STRING && parserState.isLastChar('\\') && c == '\\') { parserState.reset(); } else { parserState.update(c); } } if (state==State.IN_LINE_COMMENT){ currentLineComment.setContent(currentContent.toString()); currentLineComment.setEndLine(currLine); currentLineComment.setEndColumn(currCol); comments.addComment(currentLineComment); } return comments; } }