package ch.unibe.scg.cc.javaFrontend;
import java.util.ArrayList;
import java.util.Collection;
import java.util.regex.Pattern;
import ch.unibe.scg.cc.Protos.Function;
import ch.unibe.scg.cc.Tokenizer;
import ch.unibe.scg.cc.lines.StringOfLinesFactory;
import dk.brics.automaton.AutomatonMatcher;
import dk.brics.automaton.RegExp;
import dk.brics.automaton.RunAutomaton;
class JavaTokenizer implements Tokenizer {
final private static long serialVersionUID = 1L;
// TODO: the last function in a class always catches one closing curly
// bracket ("}") too much
final static String splitterRegex =
"([a-zA-Z \\[\\]<>,]*\\([a-zA-Z \\[\\]<>,]*\\)[a-zA-Z \\[\\]<>,]*\\{|([^\n]*[^.]|\\n)(class|interface)[^\n]*)[^\n]*";
final private Pattern wrongMethodKeywords = Pattern.compile("\\b(switch|while|if|for)\\b\\s*\\(");
final private RunAutomaton splitter = new RunAutomaton(new RegExp(splitterRegex).toAutomaton());
// Prevent subclassing.
JavaTokenizer() {}
@Override
public Iterable<Function> tokenize(String file) {
int currentLineNumber = 0;
int lastStart = 0;
Collection<Function> ret = new ArrayList<>();
AutomatonMatcher m = splitter.newMatcher(file);
while (m.find()) {
int start = m.start();
// TODO: hack - can be thrown away with the new regex-engine
// ensure that start is at the beginning of a line
if (start > 0 && file.charAt(start - 1) != '\n') {
start = file.substring(0, start).lastIndexOf('\n') + 1;
}
// enlarge match if regex captures a "wrong" method
if (wrongMethodKeywords.matcher(m.group()).find()) {
continue;
}
String currentFunctionString = file.substring(lastStart, start);
// don't save first match (package & import statements)
if (currentLineNumber > 0) {
ret.add(Function.newBuilder()
.setBaseLine(currentLineNumber).setContents(currentFunctionString).build());
}
lastStart = start;
currentLineNumber += StringOfLinesFactory.countOccurrences(currentFunctionString, '\n');
}
String currentFunctionString = file.substring(lastStart, file.length());
ret.add(Function.newBuilder()
.setBaseLine(currentLineNumber).setContents(currentFunctionString).build());
return ret;
}
}