/*
* Copyright 2014 Hewlett-Packard Development Company, L.P
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hp.alm.ali.idea.spellcheck;
import java.text.BreakIterator;
import java.util.NoSuchElementException;
public class SpellCheckTokenizer {
private BreakIterator wordIterator;
private Token token;
private String text;
public SpellCheckTokenizer(String text) {
this.text = text;
wordIterator = BreakIterator.getWordInstance();
wordIterator.setText(text);
wordIterator.first();
}
public boolean hasMoreTokens() {
while (token == null) {
int offset = wordIterator.current();
if (offset >= text.length()) {
return false;
}
String word = text.substring(offset, wordIterator.next());
if (Character.isLetter(word.charAt(0))) {
// only words starting with letter (no whitespace, punctuation etc)
token = new Token(word, offset);
}
}
return true;
}
public Token nextToken() {
if (!hasMoreTokens()) {
throw new NoSuchElementException();
}
Token current = token;
token = null;
return current;
}
public static class Token {
private final String word;
private final int offset;
Token(String word, int offset) {
this.word = word;
this.offset = offset;
}
public String getWord() {
return word;
}
public int getOffset() {
return offset;
}
public int getLength() {
return word.length();
}
}
}