/* * ============================================================================= * * Copyright (c) 2011-2016, The THYMELEAF team (http://www.thymeleaf.org) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * ============================================================================= */ package org.thymeleaf.templateparser.text; import java.io.Reader; import java.io.StringReader; import java.util.Arrays; /* * The TextParser is very similar in concept and structure to AttoParser's MarkupParser, but hugely simplified, given * text parsing does not need most of the events, configurability and conditions of markup parsing. * * Note that, instead of using AttoParser's IMarkupParser interface, the much simpler ITextHandler is used here instead. * * @author Daniel Fernandez * @since 3.0.0 * */ final class TextParser { private final BufferPool pool; private final boolean processCommentsAndLiterals; private final boolean standardDialectPresent; TextParser(final int poolSize, final int bufferSize, final boolean processCommentsAndLiterals, final boolean standardDialectPresent) { super(); this.pool = new BufferPool(poolSize, bufferSize); this.processCommentsAndLiterals = processCommentsAndLiterals; this.standardDialectPresent = standardDialectPresent; } public void parse(final String document, final ITextHandler handler) throws TextParseException { if (document == null) { throw new IllegalArgumentException("Document cannot be null"); } parse(new StringReader(document), handler); } public void parse( final Reader reader, final ITextHandler handler) throws TextParseException { if (reader == null) { throw new IllegalArgumentException("Reader cannot be null"); } if (handler == null) { throw new IllegalArgumentException("Handler cannot be null"); } ITextHandler handlerChain = handler; // The TextEventProcessorHandler will basically be in charge of controlling the stack of elements (the correct // nesting of element events). handlerChain = new EventProcessorTextHandler(handlerChain); // If comment processing is active (for JAVASCRIPT and CSS template modes), we need to look inside comments and // check if they are only wrapping elements or inlined expressions, in which case we will need to unwrap them. if (this.processCommentsAndLiterals) { handlerChain = new CommentProcessorTextHandler(this.standardDialectPresent, handlerChain); } parseDocument(reader, this.pool.poolBufferSize, handlerChain); } /* * This method receiving the buffer size with package visibility allows * testing different buffer sizes. */ void parseDocument(final Reader reader, final int suggestedBufferSize, final ITextHandler handler) throws TextParseException { final long parsingStartTimeNanos = System.nanoTime(); char[] buffer = null; try { final TextParseStatus status = new TextParseStatus(); handler.handleDocumentStart(parsingStartTimeNanos, 1, 1); int bufferSize = suggestedBufferSize; buffer = this.pool.allocateBuffer(bufferSize); int bufferContentSize = reader.read(buffer); boolean cont = (bufferContentSize != -1); status.offset = -1; status.line = 1; status.col = 1; status.inStructure = false; status.literalMarker = (char)0; while (cont) { parseBuffer(buffer, 0, bufferContentSize, handler, status); int readOffset = 0; int readLen = bufferSize; if (status.offset == 0) { if (bufferContentSize == bufferSize) { // Buffer is not big enough, double it! char[] newBuffer = null; try { bufferSize *= 2; newBuffer = this.pool.allocateBuffer(bufferSize); System.arraycopy(buffer, 0, newBuffer, 0, bufferContentSize); this.pool.releaseBuffer(buffer); buffer = newBuffer; } catch (final Exception ignored) { this.pool.releaseBuffer(newBuffer); } } // it's possible for two reads to occur in a row and 1) read less than the bufferSize and 2) // still not find the next tag/end of structure readOffset = bufferContentSize; readLen = bufferSize - readOffset; } else if (status.offset < bufferContentSize) { System.arraycopy(buffer, status.offset, buffer, 0, bufferContentSize - status.offset); readOffset = bufferContentSize - status.offset; readLen = bufferSize - readOffset; status.offset = 0; bufferContentSize = readOffset; } final int read = reader.read(buffer, readOffset, readLen); if (read != -1) { bufferContentSize = readOffset + read; } else { cont = false; } } // Iteration done, now it's time to clean up in case we still have some text to be notified int lastLine = status.line; int lastCol = status.col; final int lastStart = status.offset; final int lastLen = bufferContentSize - lastStart; if (lastLen > 0) { if (status.inStructure) { throw new TextParseException( "Incomplete structure: \"" + new String(buffer, lastStart, lastLen) + "\"", status.line, status.col); } handler.handleText(buffer, lastStart, lastLen, status.line, status.col); // As we have produced an additional text event, we need to fast-forward the // lastLine and lastCol position to include the last text structure. for (int i = lastStart; i < (lastStart + lastLen); i++) { final char c = buffer[i]; if (c == '\n') { lastLine++; lastCol = 1; } else { lastCol++; } } } final long parsingEndTimeNanos = System.nanoTime(); handler.handleDocumentEnd(parsingEndTimeNanos, (parsingEndTimeNanos - parsingStartTimeNanos), lastLine, lastCol); } catch (final TextParseException e) { throw e; } catch (final Exception e) { throw new TextParseException(e); } finally { this.pool.releaseBuffer(buffer); try { reader.close(); } catch (final Throwable ignored) { // This exception can be safely ignored } } } private void parseBuffer( final char[] buffer, final int offset, final int len, final ITextHandler handler, final TextParseStatus status) throws TextParseException { final int[] locator = new int[] {status.line, status.col}; int currentLine = locator[0]; int currentCol = locator[1]; final int maxi = offset + len; int i = offset; int current = i; char c; boolean inStructure; boolean inOpenElement = false; boolean inCloseElement = false; boolean inCommentBlock = false; boolean inCommentLine = false; boolean inLiteral = false; int pos = i; int tagStart = i; int tagEnd = i; while (i < maxi) { inStructure = (inOpenElement || inCloseElement || inCommentBlock || inCommentLine || inLiteral); if (!inStructure) { pos = TextParsingUtil.findNextStructureStartOrLiteralMarker( buffer, i, maxi, locator, this.processCommentsAndLiterals); if (pos == -1) { status.offset = current; status.line = currentLine; status.col = currentCol; status.inStructure = false; status.literalMarker = (char)0; return; } c = buffer[pos]; inOpenElement = TextParsingElementUtil.isOpenElementStart(buffer, pos, maxi); if (!inOpenElement) { inCloseElement = TextParsingElementUtil.isCloseElementStart(buffer, pos, maxi); if (!inCloseElement) { if (this.processCommentsAndLiterals) { inCommentBlock = TextParsingCommentUtil.isCommentBlockStart(buffer, pos, maxi); if (!inCommentBlock) { inCommentLine = TextParsingCommentUtil.isCommentLineStart(buffer, pos, maxi); if (!inCommentLine) { inLiteral = (c == '\'' || c == '"'); status.literalMarker = c; } } } } } inStructure = (inOpenElement || inCloseElement || inCommentBlock || inCommentLine || inLiteral); if (inStructure && !inLiteral) { // We won't advance the "structure start" pointer if this is just a literal because we want // to send literals as parts of their larger containing texts, not separately tagStart = pos; } while (!inStructure) { // We found a '[' or a '/', but it cannot be considered beginning of any known structure // Or also it could have been a character starting or ending a literal ParsingLocatorUtil.countChar(locator, c); pos = TextParsingUtil.findNextStructureStartOrLiteralMarker( buffer, pos + 1, maxi, locator, this.processCommentsAndLiterals); if (pos == -1) { status.offset = current; status.line = currentLine; status.col = currentCol; status.inStructure = false; status.literalMarker = (char)0; return; } c = buffer[pos]; inOpenElement = TextParsingElementUtil.isOpenElementStart(buffer, pos, maxi); if (!inOpenElement) { inCloseElement = TextParsingElementUtil.isCloseElementStart(buffer, pos, maxi); if (!inCloseElement) { if (this.processCommentsAndLiterals) { inCommentBlock = TextParsingCommentUtil.isCommentBlockStart(buffer, pos, maxi); if (!inCommentBlock) { inCommentLine = TextParsingCommentUtil.isCommentLineStart(buffer, pos, maxi); if (!inCommentLine) { inLiteral = (c == '\'' || c == '"'); status.literalMarker = c; } } } } } inStructure = (inOpenElement || inCloseElement || inCommentBlock || inCommentLine || inLiteral); if (inStructure && !inLiteral) { // We won't advance the "structure start" pointer if this is just a literal because we want // to send literals as parts of their larger containing texts, not separately tagStart = pos; } } if (tagStart > current) { // We avoid empty-string text events handler.handleText( buffer, current, (tagStart - current), currentLine, currentCol); } if (tagStart == pos) { // Only advance current and the line+col pointers if we have actually found something current = tagStart; currentLine = locator[0]; currentCol = locator[1]; } i = pos; } else { pos = inLiteral? TextParsingUtil.findNextLiteralEnd(buffer, i, maxi, locator, status.literalMarker) : inCommentBlock? TextParsingUtil.findNextCommentBlockEnd(buffer, i, maxi, locator) : inCommentLine? TextParsingUtil.findNextCommentLineEnd(buffer, i, maxi, locator) : TextParsingUtil.findNextStructureEndAvoidQuotes(buffer, i, maxi, locator); if (pos < 0) { // This is an unfinished structure status.offset = current; status.line = currentLine; status.col = currentCol; status.inStructure = true; status.literalMarker = (char)0; // We reset this anyway, because we will try to parse it fully again return; } if (inOpenElement) { // This is a open/standalone tag (to be determined by looking at the antepenultimate character) tagEnd = pos; if ((buffer[tagEnd - 1] == '/')) { TextParsingElementUtil. parseStandaloneElement(buffer, current, (tagEnd - current) + 1, currentLine, currentCol, handler); } else { TextParsingElementUtil. parseOpenElement(buffer, current, (tagEnd - current) + 1, currentLine, currentCol, handler); } inOpenElement = false; } else if (inCloseElement) { // This is a closing tag tagEnd = pos; TextParsingElementUtil. parseCloseElement(buffer, current, (tagEnd - current) + 1, currentLine, currentCol, handler); inCloseElement = false; } else if (inCommentBlock) { // Comment blocks will be parsed as such because they may contain 'natural' inlined expressions tagEnd = pos; TextParsingCommentUtil.parseComment(buffer, current, (tagEnd - current) + 1, currentLine, currentCol, handler); inCommentBlock = false; } else if (inCommentLine) { // Note that comment lines will not be parsed in a special way, only as mere texts, because they // cannot contain 'natural' inlined expressions (though they may contain normal inlined expressions) tagEnd = pos; handler.handleText(buffer, current, (tagEnd - current) + 1, currentLine, currentCol); inCommentLine = false; } else if (inLiteral) { // This is a literal // tagEnd is NOT set to pos, because we won't be sending any events, just cancelling the "literal" mode inLiteral = false; status.literalMarker = (char)0; } else { throw new IllegalStateException("Illegal parsing state: structure is not of a recognized type"); } // The ']', '/' or literal-delimiter char will be considered as processed too ParsingLocatorUtil.countChar(locator, buffer[pos]); if (tagEnd == pos) { // Only advance current and the line+col pointers if we have actually found something current = tagEnd + 1; currentLine = locator[0]; currentCol = locator[1]; } i = pos + 1; } } status.offset = current; status.line = currentLine; status.col = currentCol; status.inStructure = false; status.literalMarker = (char)0; } /* * This class models a pool of buffers, used to keep the amount of * large char[] buffer objects required to operate to a minimum. * * Note this pool never blocks, so if a new buffer is needed and all * are currently allocated, a new char[] object is created and returned. * */ private static final class BufferPool { private final char[][] pool; private final boolean[] allocated; private final int poolBufferSize; private BufferPool(final int poolSize, final int poolBufferSize) { super(); this.pool = new char[poolSize][]; this.allocated = new boolean[poolSize]; this.poolBufferSize = poolBufferSize; for (int i = 0; i < this.pool.length; i++) { this.pool[i] = new char[this.poolBufferSize]; } Arrays.fill(this.allocated, false); } private synchronized char[] allocateBuffer(final int bufferSize) { if (bufferSize != this.poolBufferSize) { // We will only pool buffers of the default size. If a different size is required, we just // create it without pooling. return new char[bufferSize]; } for (int i = 0; i < this.pool.length; i++) { if (!this.allocated[i]) { this.allocated[i] = true; return this.pool[i]; } } return new char[bufferSize]; } private synchronized void releaseBuffer(final char[] buffer) { if (buffer == null) { return; } if (buffer.length != this.poolBufferSize) { // This buffer cannot be part of the pool - only buffers with a specific size are contained return; } for (int i = 0; i < this.pool.length; i++) { if (this.pool[i] == buffer) { // Found it. Mark it as non-allocated this.allocated[i] = false; return; } } // The buffer wasn't part of our pool. Just return. } } }