package org.fandev.findUsages;
import com.intellij.lang.cacheBuilder.WordsScanner;
import com.intellij.lang.cacheBuilder.WordOccurrence;
import com.intellij.lexer.Lexer;
import com.intellij.util.Processor;
import com.intellij.psi.tree.IElementType;
import org.fandev.lang.fan.FanTokenTypes;
import org.fandev.lang.fan.FanParsingLexer;
/**
* Date: Sep 18, 2009
* Time: 12:23:07 AM
*
* @author Dror Bereznitsky
*/
public class FanWordsScanner implements WordsScanner
{
private Lexer myLexer;
public FanWordsScanner()
{
myLexer = new FanParsingLexer();
}
public void processWords(final CharSequence fileText, final Processor<WordOccurrence> processor) {
myLexer.start(fileText, 0, fileText.length(),0);
WordOccurrence occurrence = null; // shared occurrence
while (myLexer.getTokenType() != null) {
final IElementType type = myLexer.getTokenType();
if (type == FanTokenTypes.IDENTIFIER || FanTokenTypes.FAN_SYS_TYPE == type) {
if (occurrence == null){
occurrence = new WordOccurrence(fileText,myLexer.getTokenStart(),myLexer.getTokenEnd(), WordOccurrence.Kind.CODE);
} else {
occurrence.init(fileText,myLexer.getTokenStart(),myLexer.getTokenEnd(), WordOccurrence.Kind.CODE);
}
if (!processor.process(occurrence)) {
return;
}
}
else if (FanTokenTypes.COMMENTS.contains(type)) {
if (!stripWords(processor, fileText,myLexer.getTokenStart(),myLexer.getTokenEnd(), WordOccurrence.Kind.COMMENTS, occurrence)) {
return;
}
}
else if (FanTokenTypes.STRING_LITERALS.contains(type)) {
if (!stripWords(processor, fileText, myLexer.getTokenStart(),myLexer.getTokenEnd(),WordOccurrence.Kind.LITERALS, occurrence)) {
return;
}
if (type == FanTokenTypes.STRING_LITERAL) {
if (!stripWords(processor, fileText, myLexer.getTokenStart(),myLexer.getTokenEnd(),WordOccurrence.Kind.CODE, occurrence)) {
return;
}
}
}
myLexer.advance();
}
}
private static boolean stripWords(final Processor<WordOccurrence> processor,
final CharSequence tokenText,
int from,
int to,
final WordOccurrence.Kind kind,
WordOccurrence occurrence) {
// This code seems strange but it is more effective as Character.isJavaIdentifier_xxx_ is quite costly operation due to unicode
int index = from;
ScanWordsLoop:
while (true) {
while (true) {
if (index == to) {
break ScanWordsLoop;
}
char c = tokenText.charAt(index);
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ||
(Character.isJavaIdentifierStart(c) && c != '$')) {
break;
}
index++;
}
int index1 = index;
while (true) {
index++;
if (index == to) {
break;
}
char c = tokenText.charAt(index);
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
continue;
}
if (!Character.isJavaIdentifierPart(c) || c == '$') {
break;
}
}
if (occurrence == null) {
occurrence = new WordOccurrence(tokenText,index1, index, kind);
} else {
occurrence.init(tokenText,index1, index, kind);
}
if (!processor.process(occurrence)) {
return false;
}
}
return true;
}
}