package de.fuberlin.projectF.CodeGenerator; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import de.fuberlin.projectF.CodeGenerator.model.Token; import de.fuberlin.projectF.CodeGenerator.model.TokenType; // Diese Klasse ist für das Einlesen des llvm codes zuständig. Sie Parst die // ganze Datei oder den ganzen String (je nach implementierung) und erstellt einen // Tokenstream der dann über den CodeGenerator dem Translator zur Übersetzung // übergeben wird public abstract class Lexer { ArrayList<Token> tokenStream; HashMap<String,ArrayList<String>> renameCandidate; Debuginfo debug; // Diese Methoden sind in den Unterklassen implementiert public abstract int close(); public abstract Token getNextToken(); public Lexer(Debuginfo debug) { this.debug = debug; } protected ArrayList<Token> getTokenStream() { tokenStream = new ArrayList<Token>(); renameCandidate = new HashMap<String,ArrayList<String>> (); Token tok; while ((tok = getNextToken()).getType() != TokenType.EOF) { tokenStream.add(tok); } debug.println("\n\tPostprocessing of token stream -->"); postprocessing(); debug.println("\t<-- end of postprocessing"); return tokenStream; } //Vorverarbeitung einer LLVM-Code-Zeile protected String[] splitInformation(String line) { line = line.trim(); String[] tmpSplitLine; String[] splitLine; line = line.replace("(", " ( "); line = line.replace(")", " ) "); line = line.replace("[", " [ "); line = line.replace("]", " ] "); line = line.replace("{", " { "); line = line.replace("}", " } "); line = line.replace(",", " , "); line = line.replace(":", " : "); line = line.replace("\t", " "); int p1 = line.lastIndexOf('('); int p2 = line.indexOf(')', p1); if(line.indexOf(')',p2+1) != -1) { p2 = line.indexOf(')', p2+1); } line = replaceBetween(line, p1, p2, ' ', (char) 1); p1 = line.lastIndexOf(" [ ") + 1; p2 = p1; while(p1 != 0) { p2 = line.indexOf(" ] ",p1 + 1); line = replaceBetween(line, p1, p2, ' ', (char) 1); p1 = line.lastIndexOf(" [ ") + 1; } p1 = line.lastIndexOf('{'); p2 = line.indexOf('}', p1); line = replaceBetween(line, p1, p2, ' ', (char) 1); p1 = line.indexOf('"'); p2 = line.indexOf('"', p1 + 1); line = replaceBetween(line, p1, p2, ' ', (char) 1); tmpSplitLine = line.split(" "); //Filtern von nicht relevanten "wörtern" int count = 0; for (int i = 0; i < tmpSplitLine.length; i++) { if (tmpSplitLine[i].isEmpty()) { } else if (tmpSplitLine[i].contentEquals(",")) { } else if (tmpSplitLine[i].contentEquals("*")) { } else if (tmpSplitLine[i].contentEquals("...")) { } else if (tmpSplitLine[i].contentEquals("(")) { } else if (tmpSplitLine[i].contentEquals(")")) { } else if (tmpSplitLine[i].contentEquals("inbounds")) { } else if (tmpSplitLine[i].contentEquals("nounwind")) { } else if (tmpSplitLine[i].contentEquals("nsw")) { } else if (tmpSplitLine[i].contentEquals("tail")) { } else if (tmpSplitLine[i].contentEquals("noreturn")) { } else if (tmpSplitLine[i].contentEquals("private")) { } else if (tmpSplitLine[i].contentEquals("unnamed_addr")) { } else if (tmpSplitLine[i].contentEquals("constant")) { } else if (tmpSplitLine[i].contentEquals("align")) { break; } else { count++; } } splitLine = new String[count]; count = 0; for (int i = 0; i < tmpSplitLine.length; i++) { if (tmpSplitLine[i].isEmpty()) { } else if (tmpSplitLine[i].contentEquals(",")) { } else if (tmpSplitLine[i].contentEquals("*")) { } else if (tmpSplitLine[i].contentEquals("...")) { } else if (tmpSplitLine[i].contentEquals("(")) { } else if (tmpSplitLine[i].contentEquals(")")) { } else if (tmpSplitLine[i].contentEquals("inbounds")) { } else if (tmpSplitLine[i].contentEquals("nounwind")) { } else if (tmpSplitLine[i].contentEquals("nsw")) { } else if (tmpSplitLine[i].contentEquals("tail")) { } else if (tmpSplitLine[i].contentEquals("noreturn")) { } else if (tmpSplitLine[i].contentEquals("private")) { } else if (tmpSplitLine[i].contentEquals("unnamed_addr")) { } else if (tmpSplitLine[i].contentEquals("constant")) { } else if (tmpSplitLine[i].contentEquals("align")) { break; } else { splitLine[count++] = new String(tmpSplitLine[i]); } } return splitLine; } //Diese Funktion ersetzt bestimmte Zeichen in einem bestimmten Bereich des Strings //Mit einem anderen Zeichen private String replaceBetween(String line, int startpoint, int endpoint, char oldChar, char newChar) { if (startpoint == -1) return line; if (endpoint == -1) return line; String tmpLine1, tmpLine2; tmpLine1 = line.substring(startpoint, endpoint + 1); tmpLine2 = line.substring(endpoint + 1); line = line.substring(0, startpoint); line = line.concat(tmpLine1.replace(oldChar, newChar)); line = line.concat(tmpLine2); return line; } //Erstellen eines Token aus einer LLVM-Code-Zeile protected Token fillToken(String[] line) { Token newToken = new Token(); // Definitionen (bei Methodendeklarationen) if (line[0].contentEquals("define")) { debug.println("\t\tFound function definition"); newToken.setType(TokenType.Definition); newToken.setTarget(line[2]); newToken.setTypeTarget(line[1]); fillParameter(newToken, line[3].replace((char) 1, ' ')); } // Declaration (bei externer Methodendeklarationen) else if (line[0].contentEquals("declare")) { debug.println("\t\tFound extern function declaration"); newToken.setType(TokenType.Declare); newToken.setTarget(line[2]); } // Wertzuweisungen else if (line[0].contentEquals("store")) { if(line[2].startsWith("c\"")) { //inline Strings debug.println("\t\tFound inline string definition"); newToken.setType(TokenType.String); newToken.setOp1(line[2].substring(1)); newToken.setTarget("@_str" + line[4].substring(1)); renameCandidate.put(newToken.getTarget(),new ArrayList<String>()); renameCandidate.get(newToken.getTarget()).add(line[4]); } else { debug.println("\t\tFound Assignment"); newToken.setType(TokenType.Assignment); newToken.setTarget(line[4]); newToken.setTypeTarget(line[3].replace((char) 1, ' ')); if(line[1].equals("double")) if(line[2].charAt(0) == '%') newToken.setOp1(line[2]); else newToken.setOp1(transformInIEEE(line[2])); else newToken.setOp1(line[2]); newToken.setTypeOp1(line[1].replace((char) 1, ' ')); } } //Aufruf einer Funktion else if (line[0].contentEquals("call")) { debug.print("\t\tFound call of "); newToken.setTypeTarget(line[1]); int i; for(i = 0; i < line.length; i++) if(line[i].charAt(0) == '@') break; debug.println(line[i]); newToken.setType(TokenType.Call); newToken.setOp1(line[i]); if(line[i].equals("@printf")) { int j; for(j = i; j < line.length; j++) if(line[j].charAt(0) == '(') break; fillParameter(newToken, line[j].replace((char) 1, ' ')); if(line[j].indexOf(')') != line[j].lastIndexOf(')')) { newToken.removeParameters(1); newToken.removeParameters(1); } } else { int j; for(j = i; j < line.length; j++) if(line[j].charAt(0) == '(') break; //lesen der Parameter fillParameter(newToken, line[j].replace((char) 1, ' ')); } } //Sprung befehl else if (line[0].contentEquals("br")) { debug.println("\t\tFound branch "); newToken.setType(TokenType.Branch); if(line[1].equals("label")) { newToken.setOp2(line[2]); newToken.setTypeOp2(line[1]); } else if(line[3].equals("label")) { newToken.setTarget(line[2]); newToken.setOp1(line[4]); newToken.setTypeOp1(line[3]); newToken.setOp2(line[6]); newToken.setTypeOp2(line[5]); } newToken.setTarget(line[2]); } // Return anweisungen else if (line[0].contentEquals("ret")) { debug.println("\t\tFound return "); newToken.setType(TokenType.Return); newToken.setTypeOp1(line[1]); if (line.length > 2) { newToken.setOp1(line[2]); } } // Ende einer Definition else if (line[0].contentEquals("}")) { debug.println("\t\tFound end of current function "); newToken.setType(TokenType.DefinitionEnd); } //Label gefunden else if (line[1].contentEquals(":")) { debug.println("\t\tFound label " + line[0]); newToken.setType(TokenType.Label); newToken.setTarget(line[0]); } else if (line[1].contentEquals("=")) { // Typ-Definition (STRUCT, RECORD) if (line[2].contentEquals("type")) { debug.println("\t\tFound a new type definition "); newToken.setType(TokenType.TypeDefinition); newToken.setTarget(line[0]); fillParameter(newToken, line[3].replace((char) 1, ' ')); } //globaler String else if (line[0].startsWith("@.str")) { debug.println("\t\tFound a global string definition "); newToken.setType(TokenType.String); newToken.setTarget(line[0]); newToken.setTypeTarget(line[2].replace((char) 1, ' ')); newToken.setOp1(line[3].substring(1).replace((char) 1, ' ') .replace(" ( ", "(") .replace(" ) ", ")") .replace(" [ ", "[") .replace(" ] ", "]") .replace(" { ", "{") .replace(" } ", "}") .replace(" , ", ",") .replace(" : ", ":")); newToken.setOp2("" + line[3].length()); } // Expression Int else if (line[2].contentEquals("add") || line[2].contentEquals("sub") || line[2].contentEquals("mul") || line[2].contentEquals("sdiv") || line[2].contentEquals("or") || line[2].contentEquals("and") || line[2].contentEquals("xor") || line[2].contentEquals("shl") || line[2].contentEquals("ashr") || line[2].contentEquals("lshr")){ debug.println("\t\tFound an integer expression"); newToken.setType(TokenType.ExpressionInt); newToken.setTarget(line[0]); newToken.setTypeTarget(line[2]); newToken.setOp1(line[4]); newToken.setOp2(line[5]); } // Expression Double else if (line[2].contentEquals("fadd") || line[2].contentEquals("fsub") || line[2].contentEquals("fmul") || line[2].contentEquals("fdiv")) { debug.println("\t\tFound a double expression"); newToken.setType(TokenType.ExpressionDouble); newToken.setTarget(line[0]); newToken.setTypeTarget(line[2]); if(line[4].contains("e")) line[4] = transformInIEEE(line[4]); newToken.setOp1(line[4]); if(line[5].contains("e")) line[5] = transformInIEEE(line[5]); newToken.setOp2(line[5]); } //Cast von Integer zu Double else if (line[2].contentEquals("sitofp") || line[2].contentEquals("fptosi")) { debug.println("\t\tFound cast from integer to double"); newToken.setType(TokenType.Cast); newToken.setTarget(line[0]); newToken.setTypeTarget(line[6]); newToken.setOp1(line[4]); newToken.setTypeOp1(line[3]); } //Pointer auf ein Array, Record, oder String else if (line[2].contentEquals("getelementptr")) { debug.println("\t\tFound a pointer declaration"); newToken.setType(TokenType.Getelementptr); newToken.setTarget(line[0]); newToken.setTypeTarget(line[3].replace((char) 1, ' ')); newToken.setOp1(line[4]); newToken.setOp2(line[8]); newToken.setTypeOp2(line[7]); } // Wert aus Speicher lesen else if (line[2].contentEquals("load")) { debug.println("\t\tFound load"); newToken.setType(TokenType.Load); newToken.setTarget(line[0]); newToken.setOp1(line[4]); newToken.setTypeTarget(line[3]); } // Speicher Allocierungen else if (line[2].contentEquals("alloca")) { debug.println("\t\tFound an allocation"); newToken.setType(TokenType.Allocation); newToken.setTarget(line[0]); newToken.setTypeTarget(line[3].replace((char) 1, ' ')); //Aufruf einer Funktion mit Rückgabewert } else if (line[2].contentEquals("call")) { debug.print("\t\tFound a call to "); newToken.setTarget(line[0]); newToken.setTypeTarget(line[3]); int i; for(i = 0; i < line.length; i++) if(line[i].charAt(0) == '@') break; debug.println(line[i]); newToken.setType(TokenType.Call); newToken.setOp1(line[i]); //printf call if(line[i].equals("@printf")) { int j; for(j = i; j < line.length; j++) if(line[j].charAt(0) == '(') break; fillParameter(newToken, line[j].replace((char) 1, ' ')); if(line[j].indexOf(')') != line[j].lastIndexOf(')')) { newToken.removeParameters(1); newToken.removeParameters(1); } } else { int j; for(j = i; j < line.length; j++) if(line[j].charAt(0) == '(') break; //Parsen der Parameter fillParameter(newToken, line[j].replace((char) 1, ' ')); } } //Integer Compare Befehl else if (line[2].contentEquals("icmp")) { debug.println("\t\tFound a comparism"); newToken.setType(TokenType.CompareInteger); newToken.setTarget(line[0]); newToken.setTypeTarget(line[3]); newToken.setOp1(line[5]); newToken.setTypeOp1(line[4]); newToken.setOp2(line[6]); //Compare Befehl für Double werte } else if (line[2].contentEquals("fcmp")) { debug.println("\t\tFound a comparism"); newToken.setType(TokenType.CompareDouble); newToken.setTarget(line[0]); newToken.setTypeTarget(line[3]); newToken.setOp1(line[5]); newToken.setTypeOp1(line[4]); newToken.setOp2(line[6]); //Unbekannte LLVM-Code-Zeile } else { debug.println("\t\tToken is undefined"); newToken.setType(TokenType.Undefined); } } else { debug.println("\t\tToken is undefined"); newToken.setType(TokenType.Undefined); } return newToken; } //Diese Funktion konvertiert die Darstellung von Double Werten aus der LLVM-Code-Datei, //in das für den Prozessor übliche Format IEEE754 private String transformInIEEE(String string) { String[] sString = string.split("e"); double result = Double.parseDouble(sString[0]); if(sString[1].charAt(0) == '-') { sString[1] = sString[1].substring(1); for(int i = Integer.parseInt(sString[1]); i > 0; i--) { result = result / 10; } } else { sString[1] = sString[1].substring(1); for(int i = Integer.parseInt(sString[1]); i > 0; i--) { result = result * 10; } } long tmp = Double.doubleToLongBits(result); String tmp2 = Long.toHexString(tmp); return new String("0x" + tmp2); } //Parst die Parameter die in llvm entwerde in {}, () oder [] enthalten sind //und packt diese in den Token private void fillParameter(Token newToken, String line) { line = line.replace('(', ' '); line = line.replace(')', ' '); line = line.replace('{', ' '); line = line.replace('}', ' '); line = line.trim(); if(line.isEmpty()) return; String[] pair = line.split(","); for(int i = 0; i < pair.length; i++) { pair[i] = pair[i].trim(); int p1 = pair[i].indexOf('['); int p2 = pair[i].indexOf(']'); pair[i] = replaceBetween(pair[i], p1, p2, ' ', (char) 1); } for(String p : pair) { String[] pairValue = p.split(" "); pairValue[0] = pairValue[0].replace((char)1, ' '); if(pairValue.length > 2) newToken.addParameter(pairValue[2], pairValue[0]); else if(pairValue.length > 1) { if(!pairValue[1].startsWith("%") && pairValue[1].contains("e")) pairValue[1] = transformInIEEE(pairValue[1]); newToken.addParameter(pairValue[1], pairValue[0]); } else newToken.addParameter("", pairValue[0]); } } //Nachverarbeitung des erzeugten Tokenstreams private void postprocessing() { //inline strings umbenennen for (Map.Entry<String, ArrayList<String>> entry : renameCandidate.entrySet()) { String key = entry.getKey(); // umbenennene for(int i = 0; i < tokenStream.size(); i++) if(tokenStream.get(i).getType() == TokenType.Getelementptr) for(int k = 0; k < entry.getValue().size(); k++) if(tokenStream.get(i).getOp1().equals(entry.getValue().get(k))) tokenStream.get(i).setOp1(key); } //sprünge handhaben HashMap<String,HashMap<Integer, Integer>> contexts = new HashMap<String,HashMap<Integer, Integer>>(); String currentContext = ""; int currentBlock = 0; int var = 0; //in Blöcke unterteilen und größte Variable im Block ermitteln for(int i = 0; i < tokenStream.size(); i++) { Token tok = tokenStream.get(i); if(tok.getType() == TokenType.Definition) { currentContext = tok.getTarget(); currentBlock = 0; var = 0; debug.println("\t\tnew Context: " + currentContext); contexts.put(currentContext, new HashMap<Integer, Integer>()); debug.println("\t\t\tadd Block: " + currentBlock + " highest variable: " + var); contexts.get(currentContext).put(currentBlock, var); } else if(tok.getType() == TokenType.Branch || tok.getType() == TokenType.Return) { currentBlock = ++var; Token token = new Token(); token.setType(TokenType.Label); token.setTarget(String.valueOf(var)); debug.println("\t\t\tinsert Label \"" + token.getTarget() + "\" in token token stream"); tokenStream.add(i+1, token); debug.println("\t\t\tadd Block: " + currentBlock + " highest variable: " + var); contexts.get(currentContext).put(currentBlock, var); } else if(tok.getType() == TokenType.Label) { } else { if(!tok.getTarget().isEmpty() && tok.getTarget().length() > 1) { try { int tmp_var = Integer.valueOf(tok.getTarget().substring(1)); if(tmp_var > var) { var = tmp_var; debug.println("\t\t\tchange Block: " + currentBlock + " highest variable: " + var); contexts.get(currentContext).put(currentBlock, var); } } catch(NumberFormatException e) { } } } } } }