/*
* Copyright 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.template.soy.soyparse;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.template.soy.base.internal.IdGenerator;
import com.google.template.soy.soytree.RawTextNode;
import com.google.template.soy.soytree.RawTextNode.SourceOffsets;
import com.google.template.soy.soytree.RawTextNode.SourceOffsets.Reason;
import javax.annotation.Nullable;
/**
* A helper for building raw text nodes.
*
* <p>RawText handling is complex. We need to
*
* <ul>
* <li>Perform line joining on sequences of 'basic raw text'
* <ul>
* <li>Remove a leading newline (and surrounding whitespace)
* <li>Remove a trailing newline (and surrounding whitespace)
* <li>Replace interior newlines and surrounding whitespace with a single {@code ' '} unless
* it immediately precedes a '<' or succeeds a '>' in which case we just remove it.
* </ul>
*
* <li>Calculate the 'effective' start and end locations given the stripping of leading and
* trailing whitespace
* <li>Accumulate our other 'textual' tokens which do not trigger line joining
* </ul>
*
* <p>These rules appear to be an approximation of the <a
* href="https://www.w3.org/TR/html4/struct/text.html#h-9.1">html whitespace rules</a> but it
* completely ignores interior non-newline whitespace and preserves it entirely. See the unit tests
* for examples.
*/
final class RawTextBuilder {
private final RawTextNode.SourceOffsets.Builder offsets = new RawTextNode.SourceOffsets.Builder();
private final StringBuilder buffer = new StringBuilder();
private final String fileName;
private final IdGenerator nodeIdGen;
// The index in buffer where the current sequence of basic textual content starts.
private int basicStart = -1;
// within a sequence of basic text, tracks the start of the current sequence of whitespace
private int basicStartOfWhitespace = -1;
// The ending line and column at the start of the current sequence of whitespace. May be -1 if
// the current whitespace is leading whitespace.
private int endLineAtStartOfWhitespace;
private int endColumnAtStartOfWhitespace;
// tracks whether the current sequence of whitespace contains a newline
private boolean basicHasNewline = false;
// this will be set to non {@code NONE} if the previous sequence of text added isn't a basic
// text literal. this will force us to record a new offset for the next token
private SourceOffsets.Reason discontinuityReason = Reason.NONE;
RawTextBuilder(String fileName, IdGenerator nodeIdGen) {
this.fileName = checkNotNull(fileName);
this.nodeIdGen = checkNotNull(nodeIdGen);
}
/** Append a basic token. 'Basic' tokens are text literals. */
void addBasic(Token token) {
if (basicStart == -1) {
basicStart = buffer.length();
basicStartOfWhitespace = -1;
basicHasNewline = false;
}
switch (token.kind) {
case SoyFileParserConstants.TOKEN_WS:
if (token.image.equals("\r\n") || token.image.equals("\r") || token.image.equals("\n")) {
basicHasNewline = true;
}
if (basicStartOfWhitespace == -1) {
basicStartOfWhitespace = buffer.length();
endLineAtStartOfWhitespace = offsets.endLine();
endColumnAtStartOfWhitespace = offsets.endColumn();
}
break;
case SoyFileParserConstants.TOKEN_NOT_WS:
maybeCollapseWhitespace(token.image);
break;
default:
throw new AssertionError(
SoyFileParserConstants.tokenImage[token.kind] + " is not a basic text token");
}
append(token, token.image);
}
/** Add the content for a '{literal}...{/literal}' section. */
void addLiteral(Token literalContent) {
checkArgument(literalContent.kind == SoyFileParserConstants.LITERAL_RAW_TEXT_CONTENT);
maybeFinishBasic();
// Note: the LITERAL_RAW_TEXT_CONTENT already has the correct image content (it matches the
// closing {/literal} but excludes the actual closing tag).
if (!literalContent.image.isEmpty()) {
append(literalContent, literalContent.image);
}
discontinuityReason = Reason.LITERAL;
}
/** Add the content for a 'textual' command token, like '{sp}'. */
void addTextualCommand(Token token) {
maybeFinishBasic();
// appending the empty string violates some invariants about the buffer only ever being extended
if (token.kind != SoyFileParserConstants.CMD_FULL_NIL) {
append(token, rawTextCmdToString(token));
}
discontinuityReason = Reason.COMMAND;
}
private static String rawTextCmdToString(Token token) {
switch (token.kind) {
case SoyFileParserConstants.CMD_FULL_SP:
return " ";
case SoyFileParserConstants.CMD_FULL_CR:
return "\r";
case SoyFileParserConstants.CMD_FULL_LF:
return "\n";
case SoyFileParserConstants.CMD_FULL_TAB:
return "\t";
case SoyFileParserConstants.CMD_FULL_LB:
return "{";
case SoyFileParserConstants.CMD_FULL_RB:
return "}";
default:
throw new IllegalArgumentException(
"unexpected token: " + SoyFileParserConstants.tokenImage[token.kind]);
}
}
RawTextNode build() {
maybeFinishBasic();
String text = buffer.toString();
RawTextNode.SourceOffsets sourceOffsets = offsets.build(text.length(), discontinuityReason);
return new RawTextNode(
nodeIdGen.genId(), text, sourceOffsets.getSourceLocation(fileName), sourceOffsets);
}
/** updates the location with the given tokens location. */
private void append(Token token, String content) {
if (content.isEmpty()) {
throw new IllegalStateException(
String.format(
"shouldn't append empty content: %s @ %s",
SoyFileParserConstants.tokenImage[token.kind], Tokens.createSrcLoc(fileName, token)));
}
// add a new offset if:
// - this is the first token
// - the previous token introduced a discontinuity (due to a special token, or whitespace
// joining)
// - this token doesn't directly abut the previous token (this happens when there is a comment)
boolean addOffset = false;
if (offsets.isEmpty()) {
addOffset = true;
} else if (discontinuityReason != Reason.NONE) {
addOffset = true;
} else {
// are the two tokens not adjacent? We don't actually record comments in the AST or token
// stream so this is kind of a guess, but all known cases are due to comments.
if (offsets.endLine() == token.beginLine) {
if (offsets.endColumn() + 1 != token.beginColumn) {
addOffset = true;
discontinuityReason = Reason.COMMENT;
}
} else if (offsets.endLine() + 1 == token.beginLine && token.beginColumn != 1) {
addOffset = true;
discontinuityReason = Reason.COMMENT;
}
}
if (addOffset) {
offsets.add(buffer.length(), token.beginLine, token.beginColumn, discontinuityReason);
discontinuityReason = Reason.NONE;
}
offsets.setEndLocation(token.endLine, token.endColumn);
buffer.append(content);
}
// Completes the current open basic text sequence.
private void maybeFinishBasic() {
if (basicStart != -1) {
maybeCollapseWhitespace(null);
basicStart = -1;
}
}
/**
* This method should be called at the end of a sequence of basic whitespace tokens. This is how
* we implement the line joining algorithm.
*
* @param next The next basic text token image, or null if the next token isn't a basic token.
*/
private void maybeCollapseWhitespace(@Nullable String next) {
if (basicStartOfWhitespace != -1) {
if (basicHasNewline) {
// Note: if we are replacing the whitespace we don't need to update our source location
// information. This is because
// 1. if we are stripping leading whitespace, the next token will be the start token
// - note: if there is no next token, the whole raw text node will get dropped, so we
// won't need a source location
// 2. if we are stripping trailing whitespace, the previously assigned location should be
// preserved
// 3. if we are in the middle, then our location is irrelevant
if (basicStart == basicStartOfWhitespace || next == null) {
// leading or trailing whitespace, remove it all
buffer.delete(basicStartOfWhitespace, buffer.length());
offsets.delete(basicStartOfWhitespace);
if (next == null && endColumnAtStartOfWhitespace != -1) {
// if this is trailing whitespace, then our end location will be wrong, so restore it to
// what it was when we started accumulating whitespace (assuming we had one).
offsets.setEndLocation(endLineAtStartOfWhitespace, endColumnAtStartOfWhitespace);
}
} else {
// We are in the middle, we either remove the whole segment or replace it with a single
// space character based on whether or not we appear to be butted up next to an html tag.
// This logic is definitely suspicious but it is important to maintain for compatibility
// reasons.
if (next.charAt(0) == '<' || buffer.charAt(basicStartOfWhitespace - 1) == '>') {
// we are immediately before or after an html tag.
buffer.delete(basicStartOfWhitespace, buffer.length());
offsets.delete(basicStartOfWhitespace);
} else {
// Otherwise, collapse to a single whitespace character.
buffer.replace(basicStartOfWhitespace, buffer.length(), " ");
offsets.delete(basicStartOfWhitespace);
}
}
discontinuityReason = Reason.WHITESPACE;
basicHasNewline = false;
}
basicStartOfWhitespace = -1;
}
}
}