/** * <copyright> * * Copyright (c) 2005, 2006, 2007, 2008 Springsite BV (The Netherlands) and others * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Martin Taal * </copyright> * * $Id: AnnotationTokenizer.java,v 1.11 2009/03/03 15:33:23 mtaal Exp $ */ package org.eclipse.emf.teneo.annotations.parser; import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.eclipse.emf.ecore.ENamedElement; import org.eclipse.emf.teneo.annotations.StoreAnnotationsException; /** * Tokenizes a java annotation. The main tokens are: - TypeName - Identifier - * Value - Array * * For example the following java annotation * * @GenericGenerator(name="hibseq", strategy = "hilo", parameters = { * @Parameter(name="table", value = "hilo_table"), * @Parameter(name="column", value="the_hilo_column")} ) * * Here GenericGenerator is a TypeName, name and * strategy are Identifiers and "hilo_table" is a * value, the array is the part between the {}. * * There is a special case where the typename is * actually a list of values, e.g. SecondaryTables. * These are treated as a special type of TypeName * which is translated into a ComplexNode with * isList=true. This is currently only supported at * the top level. * * @author <a href="mailto:mtaal at elver.org">Martin Taal</a> */ class AnnotationTokenizer { /** Log it */ private final static Log log = LogFactory.getLog(AnnotationTokenizer.class); /** Special Tokens */ static final int T_EOF = 4096; private static final int T_EOL = 8192; private static final int T_UNKNOWN = 16384; /** * Annotation tokens */ static final int T_TYPENAME = 2; static final int T_IDENTIFIER = 4; static final int T_ARRAYSTART = 8; static final int T_ARRAYEND = 16; static final int T_VALUE = 32; static final int T_IS = 64; static final int T_CONTENTSTART = 128; static final int T_CONTENTEND = 256; static final int T_COMMA = 512; /** Data */ private char[] data; /** Length */ private int length; /** Points to the start of the current token */ private int tokBeg; /** Ponts to the end of the current token. */ private int tokEnd; /** The last returned token */ private int currentToken = T_EOF - 1; private HashMap<Integer, String> constantToName = new HashMap<Integer, String>(); /** * Constructor */ AnnotationTokenizer(ENamedElement eNamedElement, String source) { setSource(source.toCharArray()); constantToName.put(2, "Annotation"); constantToName.put(4, "Attribute Name"); constantToName.put(8, "Array Start ({)"); constantToName.put(16, "Array End (})"); constantToName.put(32, "Value (e.g. String, int)"); constantToName.put(64, "= character"); constantToName.put(128, "Annotation content start ('(')"); constantToName.put(256, "Annotation content end (')')"); constantToName.put(512, "Comma (,)"); constantToName.put(1024, "Carriage Return"); constantToName.put(2048, "Line Feed"); constantToName.put(4096, "EOF"); constantToName.put(8192, "EOL"); constantToName.put(16384, "Unknown"); } public String getCurrentTokenName() { final String name = constantToName.get(currentToken); if (name == null) { throw new StoreAnnotationsException("Illegal token " + currentToken); } return name; } public String getTokenNames(long tokens) { final StringBuffer sb = new StringBuffer(); for (Integer key : constantToName.keySet()) { if ((tokens & key.intValue()) > 0) { if (sb.length() > 0) { sb.append(", "); } sb.append(constantToName.get(key)); } } return sb.toString(); } /** * Sets the source to be tokenized form a character array. */ private void setSource(char[] iSource) { length = iSource.length; // Append three null-characters as sentinel since three // look-ahead characters are required (e.g. for the '>>>=' token). data = new char[length + 3]; System.arraycopy(iSource, 0, data, 0, length); data[length] = 0; // Append the sentinel characters. data[length + 1] = 0; data[length + 2] = 0; tokBeg = 0; tokEnd = 0; log.debug(dump()); } /** * Returns the next token. */ final int nextToken() { currentToken = getNextToken(); return currentToken; } /** Return the curren token */ final int getCurrentToken() { return currentToken; } /** * Returns the next token. */ final int getNextToken() { int lCur = tokEnd; Loop: for (;;) { char lChar = data[lCur]; // Grab next character. switch (lChar) { case ' ': // Skip leading whitespace! case '\n': // new line case '\r': // Carriage Return. case '\f': // Line Feed. case '\t': { lCur++; continue Loop; // --> Keep on skipping leading whitespace! } case 0: // End of buffer. { if (lCur == length) // Guard against embedded nulls in the // Source. { // EOBuf may only occur at the first non whitespace char. return T_EOF; // --> End of file. } throw new AnnotationParserException( "Char is 0 but end not reached " + lCur + " " + length); } // TYPENAME case '@': { ++lCur; // get rid of the @ tokBeg = lCur; // Save starting point of current lexeme. do { lChar = data[++lCur]; } while (lChar == '-' || lChar == '_' || lChar == '/' || lChar == '@' || ('0' <= lChar && lChar <= '9') || lChar == ':' || ('a' <= lChar && lChar <= 'z') || ('A' <= lChar && lChar <= 'Z')); tokEnd = lCur; // Save endpoint of current lexeme. return T_TYPENAME; // --> Identifier. } // VALUE with double quotes case '"': { // after the dollar the identifier part needs to be found tokBeg = lCur; // Save starting point of current lexeme. do { lChar = data[++lCur]; } while (lChar == ',' || lChar == '-' || lChar == '.' || lChar == ' ' || lChar == '_' || lChar == '/' || lChar == '`' || lChar == '@' || lChar == ':' || lChar == '=' || lChar == '(' || lChar == ')' || lChar == '{' || lChar == '}' || lChar == '\'' || lChar == '#' || lChar == '&' || lChar == '<' || lChar == '>' || lChar == '$' || lChar == ';' || lChar == '%' || lChar == '*' || lChar == '\'' || ('0' <= lChar && lChar <= '9') || ('a' <= lChar && lChar <= 'z') || lChar == '?' || ('A' <= lChar && lChar <= 'Z')); if (lChar != '"') { final AnnotationParserException e = new AnnotationParserException( "Value not closed with double quote, see the _ for the location " + getErrorText()); tokEnd = lCur + 1; // prevent infinite looping throw e; } tokEnd = lCur + 1; return T_VALUE; } case '(': { tokBeg = lCur; tokEnd = lCur + 1; return T_CONTENTSTART; } case ')': { tokBeg = lCur; tokEnd = lCur + 1; return T_CONTENTEND; } case '{': { tokBeg = lCur; tokEnd = lCur + 1; return T_ARRAYSTART; } case '}': { tokBeg = lCur; tokEnd = lCur + 1; return T_ARRAYEND; } case ',': { tokBeg = lCur; tokEnd = lCur + 1; return T_COMMA; } case '=': { tokBeg = lCur; tokEnd = lCur + 1; return T_IS; } default: // the rest must be identifiers { // after the dollar the identifier part needs to be found tokBeg = lCur; // Save starting point of current lexeme. do { lChar = data[++lCur]; } while (lChar == '.' || lChar == '-' || lChar == '_' || lChar == '/' || lChar == '@' || ('0' <= lChar && lChar <= '9') || ('a' <= lChar && lChar <= 'z') || ('A' <= lChar && lChar <= 'Z')); tokEnd = lCur; // Save endpoint of current lexeme. return T_IDENTIFIER; // --> Identifier. } } } } /** * Returns the current lexeme. */ final String getLexeme() { return new String(data, tokBeg, tokEnd - tokBeg); } /** * Returns an error version of the query with a _ at the error location. */ final String getErrorText() { // final StringBuffer result = new StringBuffer(); // result.append("E Element: " + eNamedElement.getName() + "\n"); // result.append("Begin: " + tokBeg + "\n"); // result.append("End: " + tokEnd + "\n"); // result.append("Length: " + data.length + "\n"); // result.append("first part: " + new String(data, 0, tokEnd) + "\n"); // result.append("Last part: " + new String(data, tokEnd, data.length - // tokEnd - 2) + "\n"); return new String(data, 0, tokEnd) + "_" + new String(data, tokEnd, data.length - tokEnd - 2) + "\nCurrent lexeme: " + getLexeme(); } /** * Dumps the tokens. */ final String dump() { final StringBuffer result = new StringBuffer(); int oldTokBeg = tokBeg; int oldTokEnd = tokEnd; int oldCurrentToken = currentToken; // Reset pointers. tokBeg = 0; tokEnd = 0; boolean lFinished = false; int lTok = 0; while (!lFinished) { try { lTok = nextToken(); if (lTok != T_EOL) // Don't log End-of-line tokens. { result.append("Tok: " + lTok + ": '" + getLexeme() + "'\n"); } } catch (AnnotationParserException e) { result.append("Tok: " + T_UNKNOWN + ": " + getLexeme() + "'"); throw e; } lFinished = lTok == T_EOF; } // Restore state. tokBeg = oldTokBeg; tokEnd = oldTokEnd; currentToken = oldCurrentToken; return result.toString(); } }