RichTextTokenizer.java example

Explorer
WaveInCloud-master
/**
 * Copyright 2009 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.waveprotocol.wave.model.richtext;

import static org.waveprotocol.wave.model.richtext.RichTextTokenizer.Type.TypeGroup.BLOCK;
import static org.waveprotocol.wave.model.richtext.RichTextTokenizer.Type.TypeGroup.BLOCK_RANGE;
import static org.waveprotocol.wave.model.richtext.RichTextTokenizer.Type.TypeGroup.STYLE;
import static org.waveprotocol.wave.model.richtext.RichTextTokenizer.Type.TypeGroup.TEXTUAL;

import java.util.NoSuchElementException;

/**
 * Normalizes DOM into a linear stream of tokens, used primarily for pasting.
 * This is done in order to facilitate the inspection of HTML content that is
 * pasted for specific handlers such as semantically extracting styles as
 * annotations.
 *
 */
public interface RichTextTokenizer {
  /**
   * Set of allowed tokens. Start/end tokens will never be nested.
   *
   *  LAST_NEW_LINE exists to mark the final newline in the stream. This is
   * useful when dealing with trailing inline content.
   *
   *  TODO(user): Either replace LAST_NEW_LINE with a special method that
   * queries if the current token is the last one of its kind or some extra data
   * on the NEW_LINE token.
   *
   *  TODO(user): These should be registered from the annotation handlers
   * themselves... or at least not so hardcoded.
   */
  public enum Type {
    UNORDERED_LIST_START(BLOCK_RANGE, 1),
    UNORDERED_LIST_END(BLOCK_RANGE),
    ORDERED_LIST_START(BLOCK_RANGE, 1),
    ORDERED_LIST_END(BLOCK_RANGE),

    NEW_LINE(BLOCK),
    LIST_ITEM(BLOCK),

    TEXT(TEXTUAL),

    STYLE_FONT_WEIGHT_START(STYLE),
    STYLE_FONT_WEIGHT_END(STYLE),
    STYLE_FONT_STYLE_START(STYLE),
    STYLE_FONT_STYLE_END(STYLE),
    STYLE_TEXT_DECORATION_START(STYLE),
    STYLE_TEXT_DECORATION_END(STYLE),
    STYLE_COLOR_START(STYLE),
    STYLE_COLOR_END(STYLE),
    STYLE_BG_COLOR_START(STYLE),
    STYLE_BG_COLOR_END(STYLE),
    STYLE_FONT_FAMILY_START(STYLE),
    STYLE_FONT_FAMILY_END(STYLE),
    LINK_START(STYLE),
    LINK_END(STYLE);

    public enum TypeGroup {
      BLOCK_RANGE,
      BLOCK,
      TEXTUAL,
      STYLE;
    }

    private final TypeGroup group;
    private final int indent;

    Type(TypeGroup group) {
      this(group, -1);
    }

    Type(TypeGroup group, int indent) {
      this.group = group;
      this.indent = indent;
    }

    int indent() {
      assert indent >= 0;
      return indent;
    }

    TypeGroup group() {
      return group;
    }

    boolean isStructural() {
      return group != STYLE;
    }

    boolean isBlockLevel() {
      return group == BLOCK || group == BLOCK_RANGE;
    }
    // TODO(user): Add tables, etc.
  }

  /**
   * Returns true if the tokenizer has more tokens.
   *
   * @return true if there are more tokens to be read.
   */
  boolean hasNext();

  /**
   * Move to the next token and return the current type.
   *
   * @return the token type moved to.
   * @exception NoSuchElementException iteration has no more elements.
   */
  Type next();

  /**
   * Returns the type of the current token.
   *
   * @return the current token type.
   */
  Type getCurrentType();

  /**
   * Returns the data associated with the current token. Returns null if there
   * is no data available.
   *
   * @return token data associated with the current token.
   */
  String getData();

  /**
   * Returns a copy of this tokenizer.
   */
  RichTextTokenizer copy();
}