/**
* Following piece of code has been written by Erik Linstead
* when he was a PhD candidate at UC Irvine.
* This code implements functionality to test if each Java token
* presented to it is one of the Java keywords or
* stopwords to see if needs to be excluded.
*/
/**
* This class is used to extract keywords from source code
* comments or method/class/variable names.
*
* @author Erik Linstead (elinstea@ics.uci.edu)
*/
//package edu.uci.ics.mondego.codeindexer;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
//import edu.uci.ics.mondego.common.gof.Singleton;
//import edu.uci.ics.mondego.common.gof.SingletonManager;
public class KeywordExtractor { //implements Singleton {
//=================================================================================
private static KeywordExtractor instance = null;
public static KeywordExtractor getInstance() {
if (instance == null) {
instance = new KeywordExtractor();
//SingletonManager.getInstance().register(instance);
}
return instance;
}
/**
* this method is for testing
*/
/* public void destroyInstance() {
SingletonManager.getInstance().unregister(instance);
instance = null;
}*/
//=================================================================================
private KeywordExtractor(){
super();
}
public ArrayList processComment(String comment){
ArrayList result = new ArrayList();
String[] tokens = comment.split("[\\s]+");
for(int i = 0; i< tokens.length;++i){
result.add(tokens[i]);
}
return result;
}
public ArrayList processCode(String codeFragment){
ArrayList result = new ArrayList();
if (codeFragment == null || codeFragment.equals("")) {
return result;
}
ArrayList unprocessed = new ArrayList();
//result.add(codeFragment);
//unprocessed.add(codeFragment);
String prototype = stripMethodSyntax(codeFragment);
String[] components = prototype.split("[\\s]+");
for(int i =0; i<components.length;++i){
result.add(components[i]);
unprocessed.add(components[i]);
}
ArrayList firstCut = removeUnderScore(result);
ArrayList caseCut = parseCase(firstCut);
//replace this with better rules later
/* if(caseCut.size()==1 &&
!(codeFragment.
compareToIgnoreCase((String)caseCut.get(0))==0)) {
caseCut.add(codeFragment);
} else if (caseCut.size()>1){
caseCut.add(codeFragment);
} */
//caseCut.addAll(unprocessed);
//caseCut.addAll(firstCut);
return caseCut;
}
public ArrayList processJavaClass(Class c) {
ArrayList result = new ArrayList();
Method[] m = c.getMethods();
for(int j = 0; j<m.length;++j){
ArrayList a = processCode(m[j].getName());
result.addAll(a);
}
return result;
}
private ArrayList removeUnderScore(ArrayList tokens){
ArrayList result = new ArrayList();
Iterator it = tokens.iterator();
while(it.hasNext()){
String s = (String)it.next();
String[] words = s.split("[_$.]");
for(int i = 0; i< words.length;++i){
result.add(words[i].trim());
}
}
return result;
}
private String stripMethodSyntax(String s){
String result = s.replaceAll("[\\s]+","");
result = result.replace('(',(char)32);
result = result.replace(')',(char)32);
result = result.replace(',',(char)32);
return result;
}
private ArrayList parseCase(ArrayList tokens){
ArrayList result = new ArrayList();
Iterator it = tokens.iterator();
while(it.hasNext()){
String s = (String) it.next();
int end = s.length();
ArrayList index = new ArrayList();
index.add(new Integer(end));
for(int i = end-1; i > 0; --i){
char e = s.charAt(i);
char b = s.charAt(i-1);
boolean endLetter = Character.isLetter(e);
boolean beginLetter = Character.isLetter(b);
if(endLetter && beginLetter && Character.isUpperCase(e) &&
Character.isLowerCase(b)){
index.add(new Integer(i));
} else if(endLetter && !beginLetter){
index.add(new Integer(i));
}
}
int beginIndex = 0;
for(int j = index.size()-1;j >= 0;--j){
int endIndex = ((Integer)index.get(j)).intValue();
result.add(s.substring(beginIndex,endIndex));
beginIndex = endIndex;
}
}
return result;
}
}