package de.fuberlin.bii.tokenmatcher;
import java.util.ArrayList;
import org.junit.Assert;
import org.junit.Test;
import de.fuberlin.bii.regextodfaconverter.MinimalDfa;
import de.fuberlin.bii.regextodfaconverter.NfaToDfaConverter;
import de.fuberlin.bii.regextodfaconverter.fsm.FiniteStateMachine;
import de.fuberlin.bii.regextodfaconverter.fsm.State;
import de.fuberlin.bii.tokenmatcher.StatePayload;
import de.fuberlin.bii.tokenmatcher.Token;
import de.fuberlin.bii.tokenmatcher.TokenType;
import de.fuberlin.bii.tokenmatcher.Tokenizer;
import de.fuberlin.bii.tokenmatcher.attributes.ParseStringAttribute;
import de.fuberlin.bii.tokenmatcher.attributes.StringAttribute;
import de.fuberlin.bii.bufferedreader.*;
/**
* Test-Klasse für die Tokenizer-Klasse.
*
* @author Johannes Dahlke
*
*/
public class TokenizerTest {
/**
* Test of getNextToken method, of class Tokenizer.
*/
@SuppressWarnings("static-method")
@Test
public void testGetNextToken() throws Exception {
String sourceFilename = "tests/resources/de/fuberlin/bii/source/tokenmatcher/testrelop.fun";
FiniteStateMachine<Character, StatePayload> fsm = generateRelopFSM();
fsm.union(generateCommentFSM());
NfaToDfaConverter<Character, StatePayload> nfaToDfaConverter = new NfaToDfaConverter<Character, StatePayload>();
fsm = nfaToDfaConverter.convertToDfa(fsm);
LexemeReader lexemeReader = new BufferedLexemeReader(sourceFilename);
// LexemeReader lexemeReader = new SimpleLexemeReader(sourceFile);
Tokenizer tokenizer = new Tokenizer(lexemeReader,
new MinimalDfa<Character, StatePayload>(fsm));
Token currentToken;
String tokenString;
String[] tokensToFind = { "<OP, LE>", "<OP, LT>", "<OP, NE>",
"<OP, LT>", "<OP, NE>", "<OP, LT>", "<OP, NE>", "<OP, LE>",
"<OP, LT>", "<OP, LT>" , "<EOF, null>"};
int i = 0;
currentToken = null;
boolean expectedWarningOccur = false;
do {
try {
currentToken = tokenizer.getNextToken();
tokenString = "<" + currentToken.getType() + ", "
+ ( de.fuberlin.bii.utils.Test.isAssigned(currentToken.getAttribute())
? currentToken.getAttribute().toString()
: "null" ) + ">";
Assert.assertEquals(tokensToFind[i], tokenString);
System.out.println(tokenString);
i++;
} catch ( LexemIdentificationException li) {
expectedWarningOccur = true;
continue;
}
} while ( !Token.isEofToken( currentToken));
Assert.assertEquals(i, tokensToFind.length);
Assert.assertTrue( expectedWarningOccur);
}
/**
* Erstellt einen Automaten für Wörter, die gültige Zahlen darstellen.
*
* @return Ein endlicher Automat der die Wörter
* (1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)* erkennt.
*/
public static FiniteStateMachine<Character, TokenType> generateNumberFSM() {
FiniteStateMachine<Character, TokenType> fsm = new FiniteStateMachine<Character, TokenType>();
try {
State<Character, TokenType> state1;
State<Character, TokenType> state2;
state1 = fsm.getCurrentState();
state2 = new State<Character, TokenType>(TokenType.INT, true);
fsm.addTransition(state1, state2, '1');
fsm.addTransition(state1, state2, '2');
fsm.addTransition(state1, state2, '3');
fsm.addTransition(state1, state2, '4');
fsm.addTransition(state1, state2, '5');
fsm.addTransition(state1, state2, '6');
fsm.addTransition(state1, state2, '7');
fsm.addTransition(state1, state2, '8');
fsm.addTransition(state1, state2, '9');
fsm.addTransition(state2, state2, '0');
fsm.addTransition(state2, state2, '1');
fsm.addTransition(state2, state2, '2');
fsm.addTransition(state2, state2, '3');
fsm.addTransition(state2, state2, '4');
fsm.addTransition(state2, state2, '5');
fsm.addTransition(state2, state2, '6');
fsm.addTransition(state2, state2, '7');
fsm.addTransition(state2, state2, '8');
fsm.addTransition(state2, state2, '9');
} catch (Exception e) {
e.printStackTrace();
}
return fsm;
}
/**
* Erstellt einen Automaten für Wörter, die Wörter darstellen.
*
* @return Ein endlicher Automat der die Wörter
* (a|b|c|...|z|A|B|C|...|Z)(a|b|c|...|z|A|B|C|...|Z)* erkennt.
*/
public static FiniteStateMachine<Character, StatePayload> generateWordFSM() {
FiniteStateMachine<Character, StatePayload> fsm = new FiniteStateMachine<Character, StatePayload>();
try {
State<Character, StatePayload> state1;
State<Character, StatePayload> state2;
state1 = fsm.getCurrentState();
state2 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"ID", new ParseStringAttribute()), true);
ArrayList<Character> validChars = new ArrayList<Character>();
for (char c = 'a'; c <= 'z'; c++) {
validChars.add(c);
}
for (char c = 'A'; c <= 'Z'; c++) {
validChars.add(c);
}
for (Character c : validChars) {
fsm.addTransition(state1, state2, c);
}
for (Character c : validChars) {
fsm.addTransition(state2, state2, c);
}
} catch (Exception e) {
e.printStackTrace();
}
return fsm;
}
/**
* Erstellt einen Automaten zur Erkennung von Block- und Zeilenkommentaren.
*
* @return Ein endlicher Automat der die Wörter (/*|* /|{-|-})|//|--)
* erkennt.
*/
public static FiniteStateMachine<Character, StatePayload> generateCommentFSM() {
FiniteStateMachine<Character, StatePayload> fsm = new FiniteStateMachine<Character, StatePayload>();
try {
State<Character, StatePayload> state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, state11, state12;
state1 = fsm.getCurrentState();
state2 = new State<Character, StatePayload>();
state3 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"COMMENT", new StringAttribute("LINE"), 0), true);
state4 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"COMMENT", new StringAttribute("BLOCK_BEGIN"), 0),
true);
fsm.addTransition(state1, state2, '/');
fsm.addTransition(state2, state3, '/');
fsm.addTransition(state2, state4, '*');
state5 = new State<Character, StatePayload>();
state6 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"COMMENT", new StringAttribute("BLOCK_END"), 0),
true);
fsm.addTransition(state1, state5, '*');
fsm.addTransition(state5, state6, '/');
state7 = new State<Character, StatePayload>();
state8 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"COMMENT", new StringAttribute("BLOCK_BEGIN"), 0),
true);
fsm.addTransition(state1, state7, '{');
fsm.addTransition(state7, state8, '-');
state9 = new State<Character, StatePayload>();
state10 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"COMMENT", new StringAttribute("BLOCK_END"), 0),
true);
fsm.addTransition(state1, state9, '-');
fsm.addTransition(state9, state10, '}');
state11 = new State<Character, StatePayload>();
state12 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"COMMENT", new StringAttribute("LINE"), 0), true);
fsm.addTransition(state1, state11, '-');
fsm.addTransition(state11, state12, '-');
} catch (Exception e) {
e.printStackTrace();
}
return fsm;
}
/**
* Erstellt einen Automaten für relationale Operatoren.
*
* @return Ein endlicher Automat der die Wörter (<|<=|<>) erkennt.
*/
public static FiniteStateMachine<Character, StatePayload> generateRelopFSM() {
FiniteStateMachine<Character, StatePayload> fsm = new FiniteStateMachine<Character, StatePayload>();
try {
State<Character, StatePayload> state1, state2, state3, state4;
state1 = fsm.getCurrentState();
state2 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"OP", new StringAttribute("LT"), 0), true);
state3 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"OP", new StringAttribute("LE"), 0), true);
state4 = new State<Character, StatePayload>(
new de.fuberlin.bii.regextodfaconverter.fsm.StatePayload(
"OP", new StringAttribute("NE"), 0), true);
fsm.addTransition(state1, state2, '<');
fsm.addTransition(state2, state3, '=');
fsm.addTransition(state2, state4, '>');
} catch (Exception e) {
e.printStackTrace();
}
return fsm;
}
}