/******************************************************************************* * Copyright (c) 2000, 2008 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package com.aptana.internal.ui.text.spelling; import java.text.BreakIterator; import java.util.LinkedList; import java.util.Locale; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.IRegion; import org.eclipse.jface.text.TextUtilities; import com.aptana.internal.ui.text.spelling.engine.DefaultSpellChecker; import com.aptana.internal.ui.text.spelling.engine.ISpellCheckIterator; /** * Iterator to spell check javadoc comment regions. * * @since 3.0 */ public class SpellCheckIterator implements ISpellCheckIterator { /** The content of the region */ protected final String fContent; /** The line delimiter */ private final String fDelimiter; /** The last token */ protected String fLastToken = null; /** The next break */ protected int fNext = 1; /** The offset of the region */ protected final int fOffset; /** The predecessor break */ private int fPredecessor; /** The previous break */ protected int fPrevious = 0; /** The sentence breaks */ private final LinkedList fSentenceBreaks = new LinkedList(); /** Does the current word start a sentence? */ private boolean fStartsSentence = false; /** The successor break */ protected int fSuccessor; /** The word iterator */ private final BreakIterator fWordIterator; private boolean fIsIgnoringSingleLetters; /** * Creates a new spell check iterator. * * @param document * the document containing the specified partition * @param region * the region to spell check * @param locale * the locale to use for spell checking */ public SpellCheckIterator(IDocument document, IRegion region, Locale locale) { this(document, region, locale, BreakIterator.getWordInstance(locale)); } /** * Creates a new spell check iterator. * * @param document * the document containing the specified partition * @param region * the region to spell check * @param locale * the locale to use for spell checking * @param breakIterator * the break-iterator */ public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) { this.fOffset = region.getOffset(); this.fWordIterator = breakIterator; this.fDelimiter = TextUtilities.getDefaultLineDelimiter(document); String content; try { content = document.get(region.getOffset(), region.getLength()); // if (content.startsWith(NLSElement.TAG_PREFIX)) //content= ""; //$NON-NLS-1$ } catch (final Exception exception) { content = ""; //$NON-NLS-1$ } this.fContent = content; this.fWordIterator.setText(content); this.fPredecessor = this.fWordIterator.first(); this.fSuccessor = this.fWordIterator.next(); final BreakIterator iterator = BreakIterator .getSentenceInstance(locale); iterator.setText(content); int offset = iterator.current(); while (offset != BreakIterator.DONE) { this.fSentenceBreaks.add(new Integer(offset)); offset = iterator.next(); } } /* * @seecom.onpositive.internal.ui.text.spelling.engine.ISpellCheckIterator# * setIgnoreSingleLetters(boolean) * * @since 3.3 */ public void setIgnoreSingleLetters(boolean state) { this.fIsIgnoringSingleLetters = state; } /* * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin() */ public final int getBegin() { return this.fPrevious + this.fOffset; } /* * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd() */ public final int getEnd() { return this.fNext + this.fOffset - 1; } /* * @see java.util.Iterator#hasNext() */ public final boolean hasNext() { return this.fSuccessor != BreakIterator.DONE; } /** * Does the specified token consist of at least one letter and digits only? * * @param begin * the begin index * @param end * the end index * @return <code>true</code> iff the token consists of digits and at least * one letter only, <code>false</code> otherwise */ protected final boolean isAlphaNumeric(final int begin, final int end) { char character = 0; boolean letter = false; for (int index = begin; index < end; index++) { character = this.fContent.charAt(index); if (Character.isLetter(character)) { letter = true; } if (!Character.isLetterOrDigit(character)) { return false; } } return letter; } /** * Checks the last token against the given tags? * * @param tags * the tags to check * @return <code>true</code> iff the last token is in the given array */ protected final boolean isToken(final String[] tags) { return this.isToken(this.fLastToken, tags); } /** * Checks the given token against the given tags? * * @param token * the token to check * @param tags * the tags to check * @return <code>true</code> iff the last token is in the given array * @since 3.3 */ protected final boolean isToken(final String token, final String[] tags) { if (token != null) { for (int index = 0; index < tags.length; index++) { if (token.equals(tags[index])) { return true; } } } return false; } /** * Is the current token a single letter token surrounded by non-whitespace * characters? * * @param begin * the begin index * @return <code>true</code> iff the token is a single letter token, * <code>false</code> otherwise */ protected final boolean isSingleLetter(final int begin) { if (!Character.isLetter(this.fContent.charAt(begin))) { return false; } if ((begin > 0) && !Character.isWhitespace(this.fContent.charAt(begin - 1))) { return false; } if ((begin < this.fContent.length() - 1) && !Character.isWhitespace(this.fContent.charAt(begin + 1))) { return false; } return true; } /** * Does the specified token look like an URL? * * @param begin * the begin index * @return <code>true</code> iff this token look like an URL, * <code>false</code> otherwise */ protected final boolean isUrlToken(final int begin) { for (int index = 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) { if (this.fContent.startsWith( DefaultSpellChecker.URL_PREFIXES[index], begin)) { return true; } } return false; } /** * Does the specified token consist of whitespace only? * * @param begin * the begin index * @param end * the end index * @return <code>true</code> iff the token consists of whitespace only, * <code>false</code> otherwise */ protected final boolean isWhitespace(final int begin, final int end) { for (int index = begin; index < end; index++) { if (!Character.isWhitespace(this.fContent.charAt(index))) { return false; } } return true; } /* * @see java.util.Iterator#next() */ public Object next() { String token = this.nextToken(); while ((token == null) && (this.fSuccessor != BreakIterator.DONE)) { token = this.nextToken(); } this.fLastToken = token; return token; } /** * Advances the end index to the next word break. */ protected final void nextBreak() { this.fNext = this.fSuccessor; this.fPredecessor = this.fSuccessor; this.fSuccessor = this.fWordIterator.next(); } /** * Returns the next sentence break. * * @return the next sentence break */ protected final int nextSentence() { return ((Integer) this.fSentenceBreaks.getFirst()).intValue(); } /** * Determines the next token to be spell checked. * * @return the next token to be spell checked, or <code>null</code> iff the * next token is not a candidate for spell checking. */ protected String nextToken() { String token = null; this.fPrevious = this.fPredecessor; this.fStartsSentence = false; this.nextBreak(); boolean update = false; if (this.fNext - this.fPrevious > 0) { if ((this.fSuccessor != BreakIterator.DONE) && (this.fContent.charAt(this.fPrevious) == IJavaDocTagConstants.JAVADOC_TAG_PREFIX)) { this.nextBreak(); if (Character .isLetter(this.fContent.charAt(this.fPrevious + 1))) { update = true; token = this.fContent.substring(this.fPrevious, this.fNext); } else { this.fPredecessor = this.fNext; } } else if ((this.fSuccessor != BreakIterator.DONE) && (this.fContent.charAt(this.fPrevious) == IHtmlTagConstants.HTML_TAG_PREFIX) && (Character.isLetter(this.fContent.charAt(this.fNext)) || (this.fContent .charAt(this.fNext) == '/'))) { if (this.fContent.startsWith( IHtmlTagConstants.HTML_CLOSE_PREFIX, this.fPrevious)) { this.nextBreak(); } this.nextBreak(); if ((this.fSuccessor != BreakIterator.DONE) && (this.fContent.charAt(this.fNext) == IHtmlTagConstants.HTML_TAG_POSTFIX)) { this.nextBreak(); if (this.fSuccessor != BreakIterator.DONE) { update = true; token = this.fContent.substring(this.fPrevious, this.fNext); } } } else if ((this.fSuccessor != BreakIterator.DONE) && (this.fContent.charAt(this.fPrevious) == IHtmlTagConstants.HTML_ENTITY_START) && (Character.isLetter(this.fContent.charAt(this.fNext)))) { this.nextBreak(); if ((this.fSuccessor != BreakIterator.DONE) && (this.fContent.charAt(this.fNext) == IHtmlTagConstants.HTML_ENTITY_END)) { this.nextBreak(); if (this.isToken(this.fContent.substring(this.fPrevious, this.fNext), IHtmlTagConstants.HTML_ENTITY_CODES)) { this.skipTokens(this.fPrevious, IHtmlTagConstants.HTML_ENTITY_END); update = true; } else { token = this.fContent.substring(this.fPrevious, this.fNext); } } else { token = this.fContent.substring(this.fPrevious, this.fNext); } update = true; } else if (!this.isWhitespace(this.fPrevious, this.fNext) && this.isAlphaNumeric(this.fPrevious, this.fNext)) { if (this.isUrlToken(this.fPrevious)) { this.skipTokens(this.fPrevious, ' '); } else if (this .isToken(IJavaDocTagConstants.JAVADOC_PARAM_TAGS)) { this.fLastToken = null; } else if (this .isToken(IJavaDocTagConstants.JAVADOC_REFERENCE_TAGS)) { this.fLastToken = null; this.skipTokens(this.fPrevious, this.fDelimiter.charAt(0)); } else if ((this.fNext - this.fPrevious > 1) || (this.isSingleLetter(this.fPrevious) && !this.fIsIgnoringSingleLetters)) { token = this.fContent.substring(this.fPrevious, this.fNext); } update = true; } } if (update && (this.fSentenceBreaks.size() > 0)) { if (this.fPrevious >= this.nextSentence()) { while ((this.fSentenceBreaks.size() > 0) && (this.fPrevious >= this.nextSentence())) { this.fSentenceBreaks.removeFirst(); } this.fStartsSentence = (this.fLastToken == null) || (token != null); } } return token; } /* * @see java.util.Iterator#remove() */ public final void remove() { throw new UnsupportedOperationException(); } /** * Skip the tokens until the stop character is reached. * * @param begin * the begin index * @param stop * the stop character */ protected final void skipTokens(final int begin, final char stop) { int end = begin; while ((end < this.fContent.length()) && (this.fContent.charAt(end) != stop)) { end++; } if (end < this.fContent.length()) { this.fNext = end; this.fPredecessor = this.fNext; this.fSuccessor = this.fWordIterator.following(this.fNext); } else { this.fSuccessor = BreakIterator.DONE; } } /* * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence() */ public final boolean startsSentence() { return this.fStartsSentence; } }