/* * JOrtho * * Copyright (C) 2005-2008 by i-net software * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. * * Created on 07.11.2005 */ package com.inet.jortho; import java.text.BreakIterator; import java.util.Locale; import javax.swing.text.AbstractDocument; import javax.swing.text.BadLocationException; import javax.swing.text.Document; import javax.swing.text.JTextComponent; import javax.swing.text.Utilities; /** * Break the text and words and search for misspelling. * @author Volker Berlin */ class Tokenizer { private final Dictionary dictionary; private final Document doc; /** end offset of current paragraph */ private int endOffset; private boolean isFirstWordInSentence; private final SpellCheckerOptions options; /** start offset of current paragraph */ private int paragraphOffset; private String phrase; private String sentence; private final BreakIterator sentences; private int startSentence, endSentence, startWord, endWord; private int wordOffset; private final LetterBasedBreakIterator words; /** * Create a tokenizer for the selected range. */ Tokenizer(final JTextComponent jText, final Dictionary dictionary, final Locale locale, final int startOffset, final int endOffset, final SpellCheckerOptions options) { this.dictionary = dictionary; doc = jText.getDocument(); this.options = options == null ? SpellChecker.getOptions() : options; sentences = BreakIterator.getSentenceInstance(locale); words = new LetterBasedBreakIterator(); paragraphOffset = startOffset; this.endOffset = endOffset; //loadSentences(); setSentencesText(); endSentence = sentences.first(); endWord = BreakIterator.DONE; } /** * Create a Tokenizer for the current paragraph * @param jText the checking JTextComponent * @param dictionary the used Dictionary * @param locale the used Locale, is needed for the word and sentence breaker * @param offset the current offset. */ Tokenizer(final JTextComponent jText, final Dictionary dictionary, final Locale locale, final int offset, final SpellCheckerOptions options) { this(jText, dictionary, locale, Utilities.getParagraphElement(jText, offset).getStartOffset(), Utilities .getParagraphElement(jText, offset).getEndOffset(), options); } /** * Create a tokenizer for the completely text document. */ Tokenizer(final JTextComponent jText, final Dictionary dictionary, final Locale locale, final SpellCheckerOptions options) { this(jText, dictionary, locale, 0, jText.getDocument().getLength(), options); } /** * Get start offset of the last misspelling in the JTextComponent. */ int getWordOffset() { return paragraphOffset + wordOffset; } /** * Was the last invalid word the first word in a sentence. * * @return true if it was the first word. */ boolean isFirstWordInSentence() { return isFirstWordInSentence; } /** * Check if the word is a web address. This means a email address or web page address. * * @param word * the word that should be check. It can not be null and can not include any whitespace. * @return true if it is a web address. */ private boolean isWebAddress(String word) { if (startWord + 2 >= sentence.length()) { return false; } final char char0 = sentence.charAt(startWord); final char char1 = sentence.charAt(startWord + 1); if (char0 == '@' && (Character.isLetter(char1) ||Character.isDigit(char1)) || startWord + 3 < sentence.length() && char0 == ':' && char1 == '/' && sentence.charAt(startWord + 2) == '/') { while (startWord < endWord) { final String next = sentence.substring(startWord, endWord).trim(); if (next.length() > 0) { word += next; startWord = endWord; endWord = words.next(); } else { break; } } return true; } return false; } /** * Loads the sentences of the current paragraph. */ private void loadSentences() { setSentencesText(); startSentence = sentences.first(); endSentence = sentences.next(); nextSentence(); } /** * Get the next misspelling word. If not found then it return null. */ String nextInvalidWord() { isFirstWordInSentence = false; while (true) { if (endWord == BreakIterator.DONE) { startSentence = endSentence; endSentence = sentences.next(); if (endSentence == BreakIterator.DONE) { if (!nextParagraph()) { return null; } } else { nextSentence(); } } while (endWord != BreakIterator.DONE) { final String word = sentence.substring(startWord, endWord).trim(); wordOffset = startSentence + startWord; startWord = endWord; endWord = words.next(); //only words with 2 or more characters are checked if (word.length() > 1 && Character.isLetter(word.charAt(0))) { boolean exist = dictionary.exist(word); if (!exist && !options.isCaseSensitive()) { exist = dictionary.exist(Utils.getInvertedCapitalizion(word)); } else if (!exist && (isFirstWordInSentence || options.getIgnoreCapitalization()) && Character.isUpperCase(word.charAt(0))) { // Uppercase check on starting of sentence final String capitalizeWord = word.substring(0, 1).toLowerCase() + word.substring(1); exist = dictionary.exist(capitalizeWord); } if (!exist && options.isIgnoreAllCapsWords() && Utils.isAllCapitalized(word)) { exist = true; } if (!exist && options.isIgnoreWordsWithNumbers() && Utils.isIncludeNumbers(word)) { exist = true; } if (!exist && !isWebAddress(word)) { return word; } isFirstWordInSentence = false; } } } } /** * Initialize the variables for the next paragraph. * @return true, if there is a next paragraph */ private boolean nextParagraph() { if (doc instanceof AbstractDocument) { paragraphOffset = ((AbstractDocument) doc).getParagraphElement(paragraphOffset).getEndOffset(); if (paragraphOffset >= endOffset) { return false; } } else { return false; } loadSentences(); return true; } /** * Load the next Sentence in the word breaker. */ private void nextSentence() { sentence = phrase.substring(startSentence, endSentence); words.setText(sentence); startWord = words.first(); endWord = words.next(); isFirstWordInSentence = true; } /** * Call sentences.setText( String ) based on the current value of paragraphOffset. */ private void setSentencesText() { int end = endOffset; if (doc instanceof AbstractDocument) { end = ((AbstractDocument) doc).getParagraphElement(paragraphOffset).getEndOffset(); } try { phrase = doc.getText(paragraphOffset, end - paragraphOffset); } catch (final BadLocationException e) { e.printStackTrace(); } sentences.setText(phrase); } /** * Update the text after a word was replaced. The changes in the text should be only after the current word offset. */ void updatePhrase() { endOffset = doc.getLength(); setSentencesText(); endSentence = sentences.following(startSentence); sentence = phrase.substring(startSentence, endSentence); words.setText(sentence); startWord = words.following(wordOffset); endWord = words.next(); } }