package hit.core;
import java.util.*;
public class Lexical{
public void Lexical_Analysis(String source){
cur_index = 0;
cur_row = 1;
Token_List.clear();
Lexical_Error_List.clear();
char cur_char;
while((cur_char = getNext(source))!=0){
if(cur_char==' ' || cur_char=='\t'){//空格或者制表符忽略
}else if(cur_char=='\n' || cur_char=='\r'){//回车换行
cur_row++;
}else if((cur_char<='Z' && cur_char>='A')||(cur_char<='z' && cur_char>='a')){//标示符关键字
String temp_string = cur_char+"";
while((cur_char = getNext(source))!=0){
if((cur_char<='Z' && cur_char>='A')||(cur_char<='z' && cur_char>='a')||(cur_char<='9' && cur_char>='0')||cur_char=='_'){
temp_string += cur_char;
}else{
cur_index--;
break;
}
}
Token temp_token = new Token();
if(Key_Word_List.contains(temp_string)){
temp_token.code = Token_Code_List.indexOf(temp_string);
temp_token.value = temp_string;
temp_token.row_number = cur_row;
temp_token.true_value = null;
}else{
temp_token.code = Token_Code_List.indexOf("id");
temp_token.value = temp_string;
temp_token.row_number = cur_row;
if(Sign_List_Temp.contains(temp_string)){
temp_token.true_value = Sign_List_Temp.indexOf(temp_string)+"";
//Sign_List_Temp.add(temp_string);
}else{
temp_token.true_value = Sign_List_Temp.size()+"";
Sign_List_Temp.add(temp_string);
Sign_List.add(temp_token);
}
}
Token_List.add(temp_token);
}else if(cur_char<='9' && cur_char>='0'){//整型或浮点型常量
String temp_string = cur_char+"";
while((cur_char = getNext(source))!=0){
if(cur_char<='9' && cur_char>='0'){
temp_string += cur_char;
}else{
//cur_index--;
break;
}
}
if(cur_char=='.'){
temp_string += cur_char;
while((cur_char = getNext(source))!=0){
if(cur_char<='9' && cur_char>='0'){
temp_string += cur_char;
}else{
cur_index--;
break;
}
}
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("const real");
temp_token.value = temp_string;
temp_token.row_number = cur_row;
temp_token.true_value = temp_string;
Token_List.add(temp_token);
}else{
//if(cur_char!=0){
cur_index--;
//}
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("const int");
temp_token.value = temp_string;
temp_token.row_number = cur_row;
temp_token.true_value = temp_string;
Token_List.add(temp_token);
}
}else if(cur_char=='\"'){//文本字符串
String temp_string="";
while((cur_char = getNext(source))!=0){
if(cur_char=='\\'){
cur_char = getNext(source);
if(!(Escape_Character_List.contains(cur_char+""))){
Lexical_Error temp_error = new Lexical_Error();
temp_error.code = 3;
temp_error.row_number = cur_row;
Lexical_Error_List.add(temp_error);
temp_string += cur_char;
}else{
temp_string += Escape_Character_Temp[Escape_Character_List.indexOf(cur_char+"")];
}
}else if(cur_char == '\"'){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("text");
temp_token.value = temp_string;
temp_token.row_number = cur_row;
temp_token.true_value = temp_string;
Token_List.add(temp_token);
break;
}else{
temp_string += cur_char;
}
}
if(cur_char!='\"'){
Lexical_Error temp_error = new Lexical_Error();
temp_error.code = 4;
temp_error.row_number = cur_row;
Lexical_Error_List.add(temp_error);
}
}else if(cur_char=='\''){ //字符常量
String temp_string = "";
cur_char = getNext(source);
if(cur_char=='\\'){
cur_char = getNext(source);
if(!(Escape_Character_List.contains(cur_char+""))){
Lexical_Error temp_error = new Lexical_Error();
temp_error.code = 3;
temp_error.row_number = cur_row;
Lexical_Error_List.add(temp_error);
temp_string += cur_char;
}else{
temp_string += Escape_Character_Temp[Escape_Character_List.indexOf(cur_char+"")];
}
}else if(cur_char!='\'' && cur_char!='\"' && cur_char!='\\'){
temp_string += cur_char;
}else{
Lexical_Error temp_error = new Lexical_Error();
temp_error.code = 1;
temp_error.row_number = cur_row;
Lexical_Error_List.add(temp_error);
}
cur_char = getNext(source);
if(cur_char=='\''){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("character");
temp_token.value = temp_string;
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}else{
Lexical_Error temp_error = new Lexical_Error();
temp_error.code = 1;
temp_error.row_number = cur_row;
Lexical_Error_List.add(temp_error);
cur_index--;
}
}else if(cur_char=='/'){ //除号、注释
cur_char = getNext(source);
if(cur_char=='/'){//C++型注释
while((cur_char = getNext(source))!=0){
if(cur_char=='\n' || cur_char=='\r'){
cur_row++;
break;
}
}
}else if(cur_char=='*'){//C型注释
while((cur_char = getNext(source))!=0){
if(cur_char=='*'){
cur_char = getNext(source);
if(cur_char=='/'){
break;
}else{
cur_index--;
}
}
}
if(cur_char!='/'){
Lexical_Error temp_error = new Lexical_Error();
temp_error.code = 0;
temp_error.row_number = cur_row;
Lexical_Error_List.add(temp_error);
}
}else{
cur_index--;
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("/");
temp_token.value = "/";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}
}else if(cur_char=='<'){
cur_char = getNext(source);
if(cur_char=='='){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("<=");
temp_token.value = "<=";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}else if(cur_char=='>'){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("<>");
temp_token.value = "<>";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}else{
cur_index--;
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("<");
temp_token.value = "<";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}
}else if(cur_char=='>'){
cur_char = getNext(source);
if(cur_char=='='){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf(">=");
temp_token.value = ">=";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}else{
cur_index--;
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf(">");
temp_token.value = ">";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}
}else if(cur_char=='='){
cur_char = getNext(source);
if(cur_char=='='){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("==");
temp_token.value = "==";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}else{
cur_index--;
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("=");
temp_token.value = "=";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}
}else if(cur_char=='+'){
cur_char = getNext(source);
if(cur_char=='+'){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("++");
temp_token.value = "++";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}else{
cur_index--;
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("+");
temp_token.value = "+";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}
}else if(cur_char=='-'){
cur_char = getNext(source);
if(cur_char=='-'){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("--");
temp_token.value = "--";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}else{
cur_index--;
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf("-");
temp_token.value = "-";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}
}else if(Token_Code_List.contains(cur_char+"")){
Token temp_token = new Token();
temp_token.code = Token_Code_List.indexOf(cur_char+"");
temp_token.value = cur_char+"";
temp_token.row_number = cur_row;
temp_token.true_value = null;
Token_List.add(temp_token);
}else{
Lexical_Error temp_error = new Lexical_Error();
temp_error.code = 2;
temp_error.row_number = cur_row;
Lexical_Error_List.add(temp_error);
}
}
}
public static char getNext(String source){//取字符串下一个
if(cur_index >= source.length()){
cur_index++;
return 0;
}else{
char c = source.charAt(cur_index++);
return c;
}
}
private static int cur_index;//字符串下标位置
private static int cur_row;//当前行数
public ArrayList<Token> Token_List = new ArrayList<>();//词素识别表
public ArrayList<Lexical_Error> Lexical_Error_List = new ArrayList<>();
public ArrayList<Token> Sign_List = new ArrayList<>();//符号表
public ArrayList<String> Sign_List_Temp = new ArrayList<>();
public static String[] Token_Code = {"if","else","for","do","while","return",
"int","float","char","double","boolean","void","true","false","include","string","<",">","=","<=",">=","<>","==",
"*","\\","+","-","/",";","!","character","text","id","const int",
"const real","(",")","{","}","&","|","~",".","#","++","--","%",","};//token种别码表
public static String[] Key_Word = {"if","else","for","do","while","return",
"int","float","char","double","boolean","void","true","false","include","string"};//关键字表
public static String[] Lexical_Error_State = {"注释未封闭","不合法的字符常量","非法字符","非法的转义字符","字符串未封闭"};//词法错误表
public static String[] Escape_Character = {"\'","\\","\"","r","n","t","b","f"};//转义字符表
public static char[] Escape_Character_Temp = {'\'','\\','\"','\r','\n','\t','\b','\f'};
private final List<String> Token_Code_List = Arrays.asList(Token_Code);
private final List<String> Key_Word_List = Arrays.asList(Key_Word);
private final List<String> Lexical_Error_State_List = Arrays.asList(Lexical_Error_State);
private final List<String> Escape_Character_List = Arrays.asList(Escape_Character);
}