HtmlWhiteSpaceStripper.java example

Explorer

jsilver-master
- android_external_jsilver-cm-10.2
  - src
    - com
      - google
        clearsilver
        jsilver
        DataLoader.java
        JSilver.java
        JSilverOptions.java
        TemplateRenderer.java
        adaptor
        JCs.java
        JHdf.java
        JSilverFactory.java
        LoadPathToFileCache.java
        ResourceLoaderAdaptor.java
        autoescape
        AutoEscapeContext.java
        AutoEscapeOptions.java
        EscapeMode.java
        compatibility
        ClearsilverRenderer.java
        compiler
        BaseCompiledTemplate.java
        CompilingClassLoader.java
        EscapingEvaluator.java
        ExpressionTranslator.java
        JSilverCompilationException.java
        JavaExpression.java
        JavaSourceWriter.java
        TemplateCompiler.java
        TemplateTranslator.java
        VariableTranslator.java
        data
        AbstractData.java
        ChainedData.java
        Data.java
        DataContext.java
        DataFactory.java
        DefaultData.java
        DefaultDataContext.java
        DefaultHdfParser.java
        DelegatedData.java
        HDFDataFactory.java
        LocalAndGlobalData.java
        NativeStringInternStrategy.java
        NestedMapData.java
        NewHdfParser.java
        NoOpStringInternStrategy.java
        Parser.java
        ParserFactory.java
        StringInternStrategy.java
        TypeConverter.java
        UniqueStack.java
        UnmodifiableData.java
        examples
        basic
        HelloWorld.java
        Iterate.java
        JSilverTest.java
        exceptions
        ExceptionUtil.java
        JSilverAutoEscapingException.java
        JSilverBadSyntaxException.java
        JSilverException.java
        JSilverIOException.java
        JSilverInterpreterException.java
        JSilverTemplateNotFoundException.java
        functions
        EscapingFunction.java
        Function.java
        FunctionExecutor.java
        FunctionRegistry.java
        NonEscapingFunction.java
        TextFilter.java
        bundles
        ClearSilverCompatibleFunctions.java
        CoreOperators.java
        escape
        HtmlEscapeFunction.java
        JsEscapeFunction.java
        JsValidateUnquotedLiteral.java
        NullEscapeFunction.java
        SimpleEscapingFunction.java
        StyleEscapeFunction.java
        UrlEscapeFunction.java
        html
        BaseUrlValidateFunction.java
        CssUrlValidateFunction.java
        HtmlStripFunction.java
        HtmlUrlValidateFunction.java
        TextHtmlFunction.java
        numeric
        AbsFunction.java
        MaxFunction.java
        MinFunction.java
        operators
        AddFunction.java
        AndFunction.java
        DivideFunction.java
        EqualFunction.java
        ExistsFunction.java
        GreaterFunction.java
        GreaterOrEqualFunction.java
        LessFunction.java
        LessOrEqualFunction.java
        ModuloFunction.java
        MultiplyFunction.java
        NotEqualFunction.java
        NotFunction.java
        NumericAddFunction.java
        NumericEqualFunction.java
        NumericFunction.java
        NumericNotEqualFunction.java
        OrFunction.java
        SubtractFunction.java
        string
        CrcFunction.java
        FindFunction.java
        LengthFunction.java
        SliceFunction.java
        structure
        FirstFunction.java
        LastFunction.java
        NameFunction.java
        SubcountFunction.java
        interpreter
        ExpressionEvaluator.java
        InterpretedMacro.java
        InterpretedTemplate.java
        InterpretedTemplateLoader.java
        LoadingTemplateFactory.java
        OptimizerProvider.java
        OptimizingTemplateFactory.java
        TemplateFactory.java
        TemplateInterpreter.java
        VariableLocator.java
        output
        InstanceOutputBufferProvider.java
        OutputBufferProvider.java
        ThreadLocalOutputBufferProvider.java
        precompiler
        PrecompiledTemplateLoader.java
        PrecompiledTemplateMapFileReader.java
        PrecompiledTemplateMapKey.java
        resourceloader
        BaseResourceLoader.java
        BufferedResourceLoader.java
        ClassLoaderResourceLoader.java
        ClassResourceLoader.java
        CompositeResourceLoader.java
        FileSystemResourceLoader.java
        InMemoryResourceLoader.java
        ResourceLoader.java
        syntax
        AutoEscaper.java
        DataCommandConsolidator.java
        InlineRewriter.java
        SequenceOptimizer.java
        StructuralWhitespaceStripper.java
        SyntaxTreeBuilder.java
        SyntaxTreeDumper.java
        SyntaxTreeOptimizer.java
        TemplateSyntaxTree.java
        TypeResolver.java
        VarOptimizer.java
        analysis
        Analysis.java
        AnalysisAdapter.java
        DepthFirstAdapter.java
        ReversedDepthFirstAdapter.java
        lexer
        Lexer.java
        LexerException.java
        node
        AAddExpression.java
        AAltCommand.java
        AAndExpression.java
        AAutoescapeCommand.java
        ACallCommand.java
        ACommaExpression.java
        ACommentCommand.java
        AContentTypeCommand.java
        ACsOpenPosition.java
        ADataCommand.java
        ADecNumberVariable.java
        ADecimalExpression.java
        ADefCommand.java
        ADescendVariable.java
        ADivideExpression.java
        AEachCommand.java
        AEqExpression.java
        AEscapeCommand.java
        AEvarCommand.java
        AExistsExpression.java
        AExpandVariable.java
        AFunctionExpression.java
        AGtExpression.java
        AGteExpression.java
        AHardIncludeCommand.java
        AHardLincludeCommand.java
        AHexExpression.java
        AHexNumberVariable.java
        AIfCommand.java
        AIncludeCommand.java
        AInlineCommand.java
        ALincludeCommand.java
        ALoopCommand.java
        ALoopIncCommand.java
        ALoopToCommand.java
        ALtExpression.java
        ALteExpression.java
        ALvarCommand.java
        AModuloExpression.java
        AMultipleCommand.java
        AMultiplyExpression.java
        ANameCommand.java
        ANameVariable.java
        ANeExpression.java
        ANegativeExpression.java
        ANoopCommand.java
        ANoopExpression.java
        ANotExpression.java
        ANumericAddExpression.java
        ANumericEqExpression.java
        ANumericExpression.java
        ANumericNeExpression.java
        AOptimizedMultipleCommand.java
        AOrExpression.java
        ASequenceExpression.java
        ASetCommand.java
        AStringExpression.java
        ASubtractExpression.java
        AUvarCommand.java
        AVarCommand.java
        AVariableExpression.java
        AWithCommand.java
        EOF.java
        Node.java
        PCommand.java
        PExpression.java
        PPosition.java
        PVariable.java
        Start.java
        Switch.java
        Switchable.java
        TAlt.java
        TAnd.java
        TArgWhitespace.java
        TAssignment.java
        TAutoescape.java
        TBang.java
        TBracketClose.java
        TBracketOpen.java
        TCall.java
        TComma.java
        TCommandDelimiter.java
        TComment.java
        TCommentStart.java
        TContentType.java
        TCsClose.java
        TCsOpen.java
        TData.java
        TDecNumber.java
        TDef.java
        TDollar.java
        TDot.java
        TEach.java
        TElse.java
        TElseIf.java
        TEq.java
        TEscape.java
        TEvar.java
        TGt.java
        TGte.java
        THardDelimiter.java
        THash.java
        THexNumber.java
        TIf.java
        TInclude.java
        TInline.java
        TLinclude.java
        TLoop.java
        TLt.java
        TLte.java
        TLvar.java
        TMinus.java
        TName.java
        TNe.java
        TOr.java
        TParenClose.java
        TParenOpen.java
        TPercent.java
        TPlus.java
        TQuestion.java
        TSet.java
        TSlash.java
        TStar.java
        TString.java
        TUvar.java
        TVar.java
        TWith.java
        TWord.java
        Token.java
        parser
        Parser.java
        ParserException.java
        State.java
        TokenIndex.java
        template
        DefaultRenderingContext.java
        DelegatingTemplateLoader.java
        HtmlWhiteSpaceStripper.java
        Macro.java
        RenderingContext.java
        Template.java
        TemplateLoader.java
        values
        NumberValue.java
        StringValue.java
        Value.java
        VariableValue.java
        VariantValue.java
        streamhtmlparser
        ExternalState.java
        HtmlParser.java
        HtmlParserFactory.java
        JavascriptParser.java
        JavascriptParserFactory.java
        ParseException.java
        Parser.java
        impl
        GenericParser.java
        HtmlParserImpl.java
        InternalState.java
        JavascriptParserImpl.java
        ParserStateTable.java
        StateTableTransition.java
        util
        CharacterRecorder.java
        EntityResolver.java
        HtmlUtils.java
        JavascriptTokenBuffer.java
    - org
      - clearsilver
        CS.java
        CSFileLoader.java
        CSUtil.java
        ClearsilverFactory.java
        DelegatedCs.java
        DelegatedHdf.java
        FactoryLoader.java
        HDF.java
        jni
        JNI.java
        JniClearsilverFactory.java
        JniCs.java
        JniHdf.java

/*
 * Copyright (C) 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.clearsilver.jsilver.template;

import java.io.IOException;

/**
 * HTML whitespace stripper to be used by JSilver.  It removes leading and
 * trailing whitespace, it reduces contiguous whitespace characters with just
 * the first character, and removes lines of nothing but whitespace.
 *
 * It does not strip whitespace inside the following elements:
 * <ul>
 * <li> PRE
 * <li> VERBATIM
 * <li> TEXTAREA
 * <li> SCRIPT
 * </ul>
 * It also strips out empty lines and leading whitespace inside HTML tags (i.e.
 * between '<' and '>') and inside SCRIPT elements.  It leaves trailing
 * whitespace since that is more costly to remove and tends to not be common
 * based on how templates are created (they don't have trailing whitespace).
 * <p>
 * Loadtests indicate that this class can strip whitespace almost as quickly
 * as just reading every character from a string (20% slower).
 * <p>
 * While not strictly compatible with the JNI Clearsilver whitestripping
 * function, we are not aware of any differences that yield functionally
 * different HTML output. However, we encourage users to verify for themselves
 * and report any differences.
 */
public class HtmlWhiteSpaceStripper implements Appendable {

  // Object to output stripped content to.
  private final Appendable out;
  // Level of whitespace stripping to perform. (Currently not used).
  // TODO: Determine what the exact differences are in levels in
  // JNI Clearsilver and see if it is worth porting it.
  private final int level;

  // Has any non-whitespace character been seen since the start of the line.
  private boolean nonWsSeen = false;
  // Was there previously one or more whitespace chars? If so, we should output
  // the first whitespace char in the sequence before any other non-whitespace
  // character. 0 signifies no pending whitespace.
  private char pendingWs = 0;

  // We just saw the start of an HTML tag '<'.
  private boolean startHtmlTag = false;
  // Are we currently in an opening HTML tag (not "</").
  private boolean inOpenTag = false;
  // Are we currently in a closing HTML tag.
  private boolean inCloseTag = false;
  // Are we currently in an HTML tag name.
  private boolean inTagName = false;

  // Are we between <textarea> tags
  private int textAreaScope = 0;
  // Are we between <pre> tags
  private int preScope = 0;
  // Are we between verbatim flags
  private int verbatimScope = 0;
  // Are we between <script> tags
  private int scriptScope = 0;

  // Used to hold HTML tag element name.
  private StringBuilder tagName = new StringBuilder(16);

  /**
   * Intermediate Appendable object that strips whitespace as it passes through characters to
   * another Appendable object.
   * 
   * @param out The Appendable object to dump the stripped output to.
   */
  public HtmlWhiteSpaceStripper(Appendable out) {
    this(out, 1);
  }

  /**
   * Intermediate Appendable object that strips whitespace as it passes through characters to
   * another Appendable object.
   * 
   * @param out The Appendable object to dump the stripped output to.
   * @param level Ignored for now.
   */
  public HtmlWhiteSpaceStripper(Appendable out, int level) {
    this.out = out;
    this.level = level;
  }

  @Override
  public String toString() {
    return out.toString();
  }

  @Override
  public Appendable append(CharSequence csq) throws IOException {
    return append(csq, 0, csq.length());
  }

  @Override
  public Appendable append(CharSequence csq, int start, int end) throws IOException {
    for (int i = start; i < end; i++) {
      append(csq.charAt(i));
    }
    return this;
  }

  @Override
  public Appendable append(char c) throws IOException {
    if (inOpenTag || inCloseTag) {
      // In an HTML tag.
      if (startHtmlTag) {
        // This is the first character in an HTML tag.
        if (c == '/') {
          // We are in a close tag.
          inOpenTag = false;
          inCloseTag = true;
        } else {
          // This is the first non-'/' character in an HTML tag.
          startHtmlTag = false;
          if (isTagNameStartChar(c)) {
            // we have a valid tag name first char.
            inTagName = true;
            tagName.append(c);
          }
        }
      } else if (inTagName) {
        // We were last parsing the name of an HTML attribute.
        if (isTagNameChar(c)) {
          tagName.append(c);
        } else {
          processTagName();
          inTagName = false;
        }
      }
      if (c == '>') {
        // We are at the end of the tag.
        inOpenTag = inCloseTag = false;
        nonWsSeen = true;
      }
      stripLeadingWsAndEmptyLines(c);
    } else {
      // Outside of HTML tag.
      if (c == '<') {
        // Starting a new HTML tag.
        inOpenTag = true;
        startHtmlTag = true;
      }
      if (preScope > 0 || verbatimScope > 0 || textAreaScope > 0) {
        // In an HTML element that we want to preserve whitespace in.
        out.append(c);
      } else if (scriptScope > 0) {
        // Want to remove newlines only.
        stripLeadingWsAndEmptyLines(c);
      } else {
        stripAll(c);
      }
    }

    return this;
  }

  private void stripLeadingWsAndEmptyLines(char c) throws IOException {
    // Detect and delete empty lines.
    switch (c) {
      case '\n':
        if (nonWsSeen) {
          out.append(c);
        }
        nonWsSeen = false;
        break;
      case ' ':
      case '\t':
      case '\r':
        if (nonWsSeen) {
          out.append(c);
        }
        break;
      default:
        if (!nonWsSeen) {
          nonWsSeen = true;
        }
        out.append(c);
    }
  }

  private void stripAll(char c) throws IOException {
    // All that remains is content that is safe to remove whitespace from.
    switch (c) {
      case '\n':
        if (nonWsSeen) {
          // We don't want blank lines so we don't output linefeed unless we
          // saw non-whitespace.
          out.append(c);
        }
        // We don't want trailing whitespace.
        pendingWs = 0;
        nonWsSeen = false;
        break;
      case ' ':
      case '\t':
      case '\r':
        if (nonWsSeen) {
          pendingWs = c;
        } else {
          // Omit leading whitespace
        }
        break;
      default:
        if (pendingWs != 0) {
          out.append(pendingWs);
          pendingWs = 0;
        }
        nonWsSeen = true;
        out.append(c);
    }
  }

  private int updateScope(int current, int inc) {
    current += inc;
    return current < 0 ? 0 : current;
  }

  /**
   * This code assumes well-formed HTML as input with HTML elements opening and closing properly in
   * the right order.
   */
  private void processTagName() {
    inTagName = false;
    String name = tagName.toString();
    tagName.delete(0, tagName.length());
    int inc = inOpenTag ? 1 : -1;
    if ("textarea".equalsIgnoreCase(name)) {
      textAreaScope = updateScope(textAreaScope, inc);
    } else if ("pre".equalsIgnoreCase(name)) {
      preScope = updateScope(preScope, inc);
    } else if ("verbatim".equalsIgnoreCase(name)) {
      verbatimScope = updateScope(verbatimScope, inc);
    } else if ("script".equalsIgnoreCase(name)) {
      scriptScope = updateScope(scriptScope, inc);
    }
  }

  private boolean isTagNameStartChar(char c) {
    return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
  }

  // From W3C HTML spec.
  private boolean isTagNameChar(char c) {
    return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || (c == '_')
        || (c == '-') || (c == ':') || (c == '.');
  }

  /**
   * Note, we treat '\n' as a separate special character as it has special rules since it determines
   * what a 'line' of content is for doing leading and trailing whitespace removal and empty line
   * removal.
   */
  private boolean isWs(char c) {
    return c == ' ' || c == '\t' || c == '\r';
  }
}