/*******************************************************************************
* Copyright (c) 2004, 2007 IBM Corporation and Cambridge Semantics Incorporated.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* File: $Source: /cvsroot/slrp/boca/com.ibm.adtech.boca.jastor/src/com/ibm/adtech/boca/jastor/JavaIdentifierEncoder.java,v $
* Created by:
* Created on: 01/23/2007
* Revision: $Id: JavaIdentifierEncoder.java 172 2007-07-31 14:22:23Z mroy $
*
* Contributors:
* IBM Corporation - initial API and implementation
* Cambridge Semantics Incorporated - Fork to Anzo
*******************************************************************************/
package org.openanzo.rdf.jastor;
/**
* Encodes arbitrary strings into valid java identifiers. Decode is provided as well to prove the encoding is bi-directional.
*
* @author Joe Betz
* @author Ben Szekely (<a href="mailto:bhszekel@us.ibm.com">bhszekel@us.ibm.com</a>)
*/
public class JavaIdentifierEncoder {
static final char ESCAPE_CHAR = '_';
private static final char BLANK_CHAR = '$';
static final char ESCAPE_CLOSE_CHAR = '_';
static final String[] keywords = new String[] { "abstract", "default", "if", "private", "this", "boolean", "do", "implements", "protected", "throw", "break", "double", "import", "public", "throws", "byte", "else", "instanceof", "return", "transient", "case", "extends", "int", "short", "try", "catch", "final", "interface", "static", "void", "char", "finally", "long", "strictfp", "volatile", "class", "float", "native", "super", "while", "const", "for", "new", "switch", "continue", "goto",
"package", "synchronized", "Resource", "Statement", "Thing", "Property" };
static final String[][] prettyMapping = new String[][] { { ".", "DOT" }, { ",", "COMMA" }, { "+", "PLUS" }, { "-", "DASH" }, { "\"", "QUOTE" }, { "'", "TICK" }, { "~", "TILDE" }, { "`", "BACKTICK" }, { "=", "EQUALS" }, { "/", "SLASH" }, { "\\", "BACKSLASH" }, { "<", "LTHAN" }, { ">", "GTHAN" }, { "[", "STRTBLK" }, { "]", "ENDBLK" }, { "{", "STRTBRKT" }, { "}", "LTHAN" }, { "?", "QMARK" }, { "!", "BANG" }, { "@", "AT" }, { "#", "POUND" }, { "$", "DOLLARSIGN" }, { "%", "PERCENT" }, { "^", "CAP" },
{ "&", "AND" }, { "*", "ASTRISK" }, { "(", "STRTPAREN" }, { ")", "ENDPAREN" }, { "|", "BAR" } };
/**
* Determine if identifier is a keyword
*
* @param identifier
* string to check
* @return true if identifier is a keyword
*/
public static boolean isKeyword(String identifier) {
for (int i = 0; i < keywords.length; i++) {
if (keywords[i].equals(identifier))
return true;
}
return false;
}
/**
* Encode a string to its encoded version
*
* @param anyString
* string to encode
* @return encoded string
*/
public static String encode(String anyString) {
StringBuilder javaIdentifier = new StringBuilder();
for (int i = 0; i < anyString.length(); i++) {
char c = anyString.charAt(i);
if (c == ESCAPE_CHAR) {
javaIdentifier.append(ESCAPE_CHAR);
javaIdentifier.append(ESCAPE_CHAR);
} else if (i == 0) {
boolean part = Character.isJavaIdentifierPart(c);
boolean start = Character.isJavaIdentifierStart(c);
if (start) {
javaIdentifier.append(c);
} else if (part) { // !start && part
javaIdentifier.append(ESCAPE_CHAR);
javaIdentifier.append(BLANK_CHAR);
javaIdentifier.append(c);
} else { // !start && !part
javaIdentifier.append(encode(c));
}
} else {
if (Character.isJavaIdentifierPart(c)) {
javaIdentifier.append(c);
} else {
javaIdentifier.append(encode(c));
}
}
}
String identifier = javaIdentifier.toString();
if (isKeyword(identifier)) {
identifier = "" + ESCAPE_CHAR + identifier;
}
return identifier;
}
/**
* Encodes every char to a valid java string
*
* @param c
* char to encode
* @return encoded char string
*/
public static String encode(char c) {
//System.err.println("Encoding: " + c);
for (int i = 0; i < prettyMapping.length; i++) {
String key = prettyMapping[i][0];
if (key.charAt(0) == c) {
return "" + ESCAPE_CHAR + prettyMapping[i][1] + ESCAPE_CLOSE_CHAR;
}
}
int val = c;
return "" + ESCAPE_CHAR + Integer.valueOf(val) + ESCAPE_CLOSE_CHAR;
}
/**
* Decode a string to its decoded version
*
* @param javaIdentifier
* string to decode
* @return decoded string
* @throws Exception
*/
//FIXEXCEPTIONS:Should throw a more precise exception
public static String decode(String javaIdentifier) throws Exception {
StringBuilder string = new StringBuilder();
chars: for (int i = 0; i < javaIdentifier.length(); i++) {
char c = javaIdentifier.charAt(i);
if (i == 0 && c == ESCAPE_CHAR) {
String identifier = javaIdentifier.substring(1, javaIdentifier.length());
if (isKeyword(identifier)) {
return identifier;
}
}
if (c == ESCAPE_CHAR) {
c = javaIdentifier.charAt(++i);
if (Character.isDigit(c)) {
StringBuilder number = new StringBuilder();
for (int j = i; j < javaIdentifier.length(); j++) {
char forward = javaIdentifier.charAt(j);
if (forward == ESCAPE_CLOSE_CHAR) {
i = j;
break;
}
if (!Character.isDefined(forward))
throw new Exception("Escape started with number but was not purely a number: " + javaIdentifier.substring(i));
number.append(forward);
}
int val = Integer.parseInt(number.toString());
string.append((char) val);
} else if (c == ESCAPE_CHAR) {
string.append(ESCAPE_CHAR);
} else if (c == BLANK_CHAR) {
continue;
} else {
// try to match a special mapping
for (int j = i; j < javaIdentifier.length(); j++) {
char forward = javaIdentifier.charAt(j);
if (forward == ESCAPE_CLOSE_CHAR) {
String prettyName = javaIdentifier.substring(i, j);
for (int k = 0; k < prettyMapping.length; k++) {
if (prettyMapping[k][1].equals(prettyName)) {
string.append(prettyMapping[k][0]);
i = j;
continue chars;
}
}
throw new Exception("No decoding found for: " + prettyName);
}
}
throw new Exception("Impossible to decode starting at: " + javaIdentifier.substring(i));
}
} else {
string.append(c);
}
}
return string.toString();
}
}