/** * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.waveprotocol.wave.model.richtext; import static org.waveprotocol.wave.model.richtext.RichTextTokenizer.Type.TypeGroup.BLOCK; import static org.waveprotocol.wave.model.richtext.RichTextTokenizer.Type.TypeGroup.BLOCK_RANGE; import static org.waveprotocol.wave.model.richtext.RichTextTokenizer.Type.TypeGroup.STYLE; import static org.waveprotocol.wave.model.richtext.RichTextTokenizer.Type.TypeGroup.TEXTUAL; import java.util.NoSuchElementException; /** * Normalizes DOM into a linear stream of tokens, used primarily for pasting. * This is done in order to facilitate the inspection of HTML content that is * pasted for specific handlers such as semantically extracting styles as * annotations. * */ public interface RichTextTokenizer { /** * Set of allowed tokens. Start/end tokens will never be nested. * * LAST_NEW_LINE exists to mark the final newline in the stream. This is * useful when dealing with trailing inline content. * * TODO(user): Either replace LAST_NEW_LINE with a special method that * queries if the current token is the last one of its kind or some extra data * on the NEW_LINE token. * * TODO(user): These should be registered from the annotation handlers * themselves... or at least not so hardcoded. */ public enum Type { UNORDERED_LIST_START(BLOCK_RANGE, 1), UNORDERED_LIST_END(BLOCK_RANGE), ORDERED_LIST_START(BLOCK_RANGE, 1), ORDERED_LIST_END(BLOCK_RANGE), NEW_LINE(BLOCK), LIST_ITEM(BLOCK), TEXT(TEXTUAL), STYLE_FONT_WEIGHT_START(STYLE), STYLE_FONT_WEIGHT_END(STYLE), STYLE_FONT_STYLE_START(STYLE), STYLE_FONT_STYLE_END(STYLE), STYLE_TEXT_DECORATION_START(STYLE), STYLE_TEXT_DECORATION_END(STYLE), STYLE_COLOR_START(STYLE), STYLE_COLOR_END(STYLE), STYLE_BG_COLOR_START(STYLE), STYLE_BG_COLOR_END(STYLE), STYLE_FONT_FAMILY_START(STYLE), STYLE_FONT_FAMILY_END(STYLE), LINK_START(STYLE), LINK_END(STYLE); public enum TypeGroup { BLOCK_RANGE, BLOCK, TEXTUAL, STYLE; } private final TypeGroup group; private final int indent; Type(TypeGroup group) { this(group, -1); } Type(TypeGroup group, int indent) { this.group = group; this.indent = indent; } int indent() { assert indent >= 0; return indent; } TypeGroup group() { return group; } boolean isStructural() { return group != STYLE; } boolean isBlockLevel() { return group == BLOCK || group == BLOCK_RANGE; } // TODO(user): Add tables, etc. } /** * Returns true if the tokenizer has more tokens. * * @return true if there are more tokens to be read. */ boolean hasNext(); /** * Move to the next token and return the current type. * * @return the token type moved to. * @exception NoSuchElementException iteration has no more elements. */ Type next(); /** * Returns the type of the current token. * * @return the current token type. */ Type getCurrentType(); /** * Returns the data associated with the current token. Returns null if there * is no data available. * * @return token data associated with the current token. */ String getData(); /** * Returns a copy of this tokenizer. */ RichTextTokenizer copy(); }