SpellCheckTokenizer.java example

Explorer
ali-idea-plugin-master
/*
 * Copyright 2014 Hewlett-Packard Development Company, L.P
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hp.alm.ali.idea.spellcheck;

import java.text.BreakIterator;
import java.util.NoSuchElementException;

public class SpellCheckTokenizer {

    private BreakIterator wordIterator;
    private Token token;
    private String text;

    public SpellCheckTokenizer(String text) {
        this.text = text;

        wordIterator = BreakIterator.getWordInstance();
        wordIterator.setText(text);
        wordIterator.first();
    }

    public boolean hasMoreTokens() {
        while (token == null) {
            int offset = wordIterator.current();
            if (offset >= text.length()) {
                return false;
            }
            String word = text.substring(offset, wordIterator.next());
            if (Character.isLetter(word.charAt(0))) {
                // only words starting with letter (no whitespace, punctuation etc)
                token = new Token(word, offset);
            }
        }
        return true;
    }

    public Token nextToken() {
        if (!hasMoreTokens()) {
            throw new NoSuchElementException();

        }
        Token current = token;
        token = null;
        return current;
    }

    public static class Token {

        private final String word;
        private final int offset;

        Token(String word, int offset) {
            this.word = word;
            this.offset = offset;
        }

        public String getWord() {
            return word;
        }

        public int getOffset() {
            return offset;
        }

        public int getLength() {
            return word.length();
        }
    }
}