/*
* $Id$
*
* Copyright (c) 2010 by the TeXlapse Team.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*/
package net.sourceforge.texlipse.spelling;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.swabunga.spell.event.AbstractWordFinder;
import com.swabunga.spell.event.Word;
import com.swabunga.spell.event.WordNotFoundException;
/**
* Finds the non TeX words in a given text. Ignores arguments
* of some special commands like \ref, \label, \begin,...
*
* @author Boris von Loesch
*
*/
public class TexlipseWordFinder extends AbstractWordFinder {
private final static Pattern MAND_ARG = Pattern.compile("\\A\\s*\\{[^\\}]+\\}");
private final static Pattern OPT_MAND_ARG = Pattern.compile("\\A\\s*(\\[[^\\]]+\\])?\\s*\\{[^\\}]+\\}");
private boolean IGNORE_COMMENTS = true;
private boolean IGNORE_MATH = true;
public TexlipseWordFinder(String st) {
super(st);
}
public TexlipseWordFinder() {
super();
}
/**
* This method scans the text from the end of the last word, and returns a
* new Word object corresponding to the next word.
*
* @return the next word.
* @throws WordNotFoundException search string contains no more words.
*/
@Override
public Word next() {
if (!hasNext())
throw new WordNotFoundException("No more words found.");
currentWord.copy(nextWord);
setSentenceIterator(currentWord);
int i = currentWord.getEnd();
boolean finished = false;
boolean started = false;
while (i < text.length() && !finished) {
if (!started && isWordChar(i)) {
nextWord.setStart(i++);
started = true;
continue;
} else if (started) {
if (isWordChar(i)) {
i++;
continue;
} else {
nextWord.setText(text.substring(nextWord.getStart(), i));
finished = true;
break;
}
}
// Ignores should be in order of importance and then specificity.
int j = i;
if (IGNORE_COMMENTS) j = ignore(j, '%', '\n');
if (IGNORE_MATH) {
//FIXME: Is not working correctly when parsing just a single line
//j = ignore(j, '$', '$');
}
if (j < text.length() && text.charAt(j) == '\\') {
// Ignore certain command parameters.
j = ignore(j, "\\documentclass", OPT_MAND_ARG);
j = ignore(j, "\\usepackage", OPT_MAND_ARG);
j = ignore(j, "\\newcounter", MAND_ARG);
j = ignore(j, "\\setcounter", MAND_ARG);
j = ignore(j, "\\addtocounter", MAND_ARG);
j = ignore(j, "\\value", MAND_ARG);
j = ignore(j, "\\arabic", MAND_ARG);
j = ignore(j, "\\stepcounter", MAND_ARG);
j = ignore(j, "\\newenvironment", MAND_ARG);
j = ignore(j, "\\renewenvironment", MAND_ARG);
j = ignore(j, "\\ref", MAND_ARG);
j = ignore(j, "\\vref", MAND_ARG);
j = ignore(j, "\\eqref", MAND_ARG);
j = ignore(j, "\\pageref", MAND_ARG);
j = ignore(j, "\\label", MAND_ARG);
j = ignore(j, "\\cite", OPT_MAND_ARG);
j = ignore(j, "\\tag", MAND_ARG);
// Ignore environment names.
j = ignore(j, "\\begin", MAND_ARG);
j = ignore(j, "\\end", MAND_ARG);
// Ignore commands.
j = ignore(j, '\\');
}
if (i != j){
i = j;
continue;
}
i++;
}
if (!started) {
nextWord = null;
} else if (!finished) {
nextWord.setText(text.substring(nextWord.getStart(), i));
}
return currentWord;
}
/**
* Define if comments contents are ignored during spell checking
* @param ignore an indication if comments content is to be ignored
*/
public void setIgnoreComments(boolean ignore) {
IGNORE_COMMENTS = ignore;
}
public void setIgnoreMath(boolean ignore) {
IGNORE_MATH = ignore;
}
/**
* Ignores a command string
* @param index
* @param command The command with leading backslash
* @param p Regexp pattern for the command arguments
* @return new index
*/
public int ignore(int index, String command, Pattern p) {
int i = 0;
//Is this the right command
while (i < command.length()) {
if (index + i >= text.length()) return index;
if (command.charAt(i) != text.charAt(i + index)) return index;
i++;
}
i = i + index;
Matcher m = p.matcher(text.substring(i));
if (m.find())
return i + m.end() - 1;
return index;
}
}