/*
* JOrtho
*
* Copyright (C) 2005-2008 by i-net software
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*
* Created on 07.11.2005
*/
package com.inet.jortho;
import java.text.BreakIterator;
import java.util.Locale;
import javax.swing.text.AbstractDocument;
import javax.swing.text.BadLocationException;
import javax.swing.text.Document;
import javax.swing.text.JTextComponent;
import javax.swing.text.Utilities;
/**
* Break the text and words and search for misspelling.
* @author Volker Berlin
*/
class Tokenizer {
private final Document doc;
private final SpellCheckerOptions options;
/** start offset of current paragraph */
private int paragraphOffset;
/** end offset of current paragraph */
private int endOffset;
private String phrase;
private final Dictionary dictionary;
private BreakIterator sentences;
private int startSentence, endSentence, startWord, endWord;
private String sentence;
private BreakIterator words;
private int wordOffset;
private boolean isFirstWordInSentence;
/**
* Create a tokenizer for the completely text document.
*/
Tokenizer( JTextComponent jText, Dictionary dictionary, Locale locale, SpellCheckerOptions options ) {
this( jText, dictionary, locale, 0, jText.getDocument().getLength(), options );
}
/**
* Create a Tokenizer for the current paragraph
* @param jText the checking JTextComponent
* @param dictionary the used Dictionary
* @param locale the used Locale, is needed for the word and sentence breaker
* @param offset the current offset.
*/
Tokenizer( JTextComponent jText, Dictionary dictionary, Locale locale, int offset, SpellCheckerOptions options ) {
this( jText, dictionary, locale, Utilities.getParagraphElement( jText, offset ).getStartOffset(),
Utilities.getParagraphElement( jText, offset ).getEndOffset(), options );
}
/**
* Create a tokenizer for the selected range.
*/
Tokenizer( JTextComponent jText, Dictionary dictionary, Locale locale, int startOffset, int endOffset, SpellCheckerOptions options ) {
this.dictionary = dictionary;
doc = jText.getDocument();
this.options = options == null ? SpellChecker.getOptions() : options;
sentences = BreakIterator.getSentenceInstance( locale );
words = BreakIterator.getWordInstance( locale );
paragraphOffset = startOffset;
this.endOffset = endOffset;
//loadSentences();
setSentencesText();
endSentence = sentences.first();
endWord = BreakIterator.DONE;
}
/**
* Get the next misspelling word. If not found then it return null.
*/
String nextInvalidWord() {
isFirstWordInSentence = false;
while( true ) {
if( endWord == BreakIterator.DONE ) {
startSentence = endSentence;
endSentence = sentences.next();
if( endSentence == BreakIterator.DONE ) {
if(!nextParagraph()){
return null;
}
}else{
nextSentence();
}
}
while( endWord != BreakIterator.DONE ) {
String word = sentence.substring( startWord, endWord ).trim();
wordOffset = startSentence + startWord;
startWord = endWord;
endWord = words.next();
//only words with 2 or more characters are checked
if( word.length() > 1 && Character.isLetter( word.charAt( 0 ) )){
boolean exist = dictionary.exist( word );
if(!exist && !options.isCaseSensitive()){
exist = dictionary.exist( Utils.getInvertedCapitalizion( word ) );
}else
if(isFirstWordInSentence && !exist && Character.isUpperCase( word.charAt( 0 ) )){
// Uppercase check on starting of sentence
String lowerWord = word.substring( 0, 1 ).toLowerCase() + word.substring( 1 );
exist = dictionary.exist( lowerWord );
}
if( !exist ) {
return word;
}
isFirstWordInSentence = false;
}
}
}
}
/**
* Was the last invalid word the first word in a sentence.
* @return true if it was the first word.
*/
boolean isFirstWordInSentence(){
return isFirstWordInSentence;
}
/**
* Initialize the variables for the next paragraph.
* @return true, if there is a next paragraph
*/
private boolean nextParagraph(){
if(doc instanceof AbstractDocument){
paragraphOffset = ((AbstractDocument)doc).getParagraphElement( paragraphOffset ).getEndOffset();
if(paragraphOffset >= endOffset){
return false;
}
}else{
return false;
}
loadSentences();
return true;
}
/**
* Loads the sentences of the current paragraph.
*/
private void loadSentences(){
setSentencesText();
startSentence = sentences.first();
endSentence = sentences.next();
nextSentence();
}
/**
* Call sentences.setText( String ) based on the current value of paragraphOffset.
*/
private void setSentencesText(){
int end = endOffset;
if(doc instanceof AbstractDocument){
end = ((AbstractDocument)doc).getParagraphElement( paragraphOffset ).getEndOffset();
}
try {
phrase = doc.getText( paragraphOffset, end-paragraphOffset );
} catch( BadLocationException e ) {
e.printStackTrace();
}
sentences.setText( phrase );
}
/**
* Load the next Sentence in the word breaker.
*/
private void nextSentence() {
sentence = phrase.substring( startSentence, endSentence );
words.setText( sentence );
startWord = words.first();
endWord = words.next();
isFirstWordInSentence = true;
}
/**
* Get start offset of the last misspelling in the JTextComponent.
*/
int getWordOffset() {
return paragraphOffset + wordOffset;
}
/**
* Update the text after a word was replaced. The changes in the text should be only after the current word offset.
*/
void updatePhrase() {
endOffset = doc.getLength();
setSentencesText();
endSentence = sentences.following( startSentence );
sentence = phrase.substring( startSentence, endSentence );
words.setText( sentence );
startWord = words.following( wordOffset );
endWord = words.next();
}
}