package org.openprovenance.prov.model;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.text.translate.AggregateTranslator;
import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
import org.apache.commons.lang3.text.translate.LookupTranslator;
import org.apache.commons.lang3.text.translate.UnicodeEscaper;
import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
public class QualifiedNameUtils {
public static final CharSequenceTranslator ESCAPE_PROV_LOCAL_NAME =
new AggregateTranslator(
new LookupTranslator(
new String[][] {
{"=", "\\="},
{"'", "\\'"},
{"(", "\\("},
{")", "\\)"},
{",", "\\,"},
// {"-", "\\-"}, Should not be escaped since - is accepted by PN_CHARS
{":", "\\:"},
{";", "\\;"},
{"[", "\\["},
{"]", "\\]"},
//{".", "\\."}, Should not be escaped since - is accepted by PN_CHARS, except for the last one
// {"%", "\\%"}, // This is not in PROV-N but is required for <percent> production
// {"<", "%3C"},
// {">", "%3E"},
}),
//new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
UnicodeEscaper.outsideOf(32, 0xFFFF) // 0x7f
);
public static final CharSequenceTranslator UNESCAPE_PROV_LOCAL_NAME =
new AggregateTranslator(
new LookupTranslator(
new String[][] {
{"\\=", "="},
{"\\'", "'"},
{"\\(", "("},
{"\\)", ")"},
{"\\,", ","},
{"\\-", "-"}, // - is a PN_CHARS_ESC
{"\\:", ":"},
{"\\;", ";"},
{"\\[", "["},
{"\\]", "]"},
{"\\.", "."},
// {"\\%", "%"}, // This is not in PROV-N but is required for <percent> production
// {"%3C", "<"},
// {"%3E", ">"},
}),
new UnicodeUnescaper()
);
public static final CharSequenceTranslator ESCAPE_UNICODE =
new AggregateTranslator(
UnicodeEscaper.outsideOf(32, 0xFF)
);
public String escapeProvLocalName(String localName) {
if ("-".equals(localName)) return "\\-";
String tmp=ESCAPE_PROV_LOCAL_NAME.translate(localName);
int len = tmp.length();
if (len>0 && tmp.charAt(len-1)=='.') {
return (tmp.substring(0,len-1)) + "\\.";
} else {
return tmp;
}
}
public String unescapeProvLocalName(String localName) {
return UNESCAPE_PROV_LOCAL_NAME.translate(localName);
}
public static final String INNER_ESCAPE="_";
public static final String START_ESCAPE="_";
public static final CharSequenceTranslator ESCAPE_TO_XML_QNAME_LOCAL_NAME =
new AggregateTranslator(
new LookupTranslator(
new String[][] {
{"=", INNER_ESCAPE + "3D"},
{"'", INNER_ESCAPE + "27"},
{"(", INNER_ESCAPE + "28"},
{")", INNER_ESCAPE + "29"},
{",", INNER_ESCAPE + "2C"},
{INNER_ESCAPE, INNER_ESCAPE + INNER_ESCAPE},
{":", INNER_ESCAPE + "3A"},
{";", INNER_ESCAPE + "3B"},
{"[", INNER_ESCAPE + "5B"},
{"]", INNER_ESCAPE + "5D"},
{"/", INNER_ESCAPE + "2F"},
{"\\", INNER_ESCAPE + "5C"},
{"?", INNER_ESCAPE + "3F"},
{"@", INNER_ESCAPE + "40"},
{"~", INNER_ESCAPE + "7E"},
{"&", INNER_ESCAPE + "26"},
{"+", INNER_ESCAPE + "2B"},
{"*", INNER_ESCAPE + "2A"},
{"#", INNER_ESCAPE + "23"},
{"$", INNER_ESCAPE + "24"},
{"!", INNER_ESCAPE + "21"},
}),
//StringEscapeUtils.ESCAPE_XML10
JavaUnicodeEscaper.outsideOf(32, 0xFFFF) // 0x7f
);
public static final CharSequenceTranslator UNESCAPE_FROM_XML_QNAME_LOCAL_NAME =
new AggregateTranslator(
new LookupTranslator(
new String[][] {
{INNER_ESCAPE + "3D", "="},
{INNER_ESCAPE + "27", "'"},
{INNER_ESCAPE + "28", "("},
{INNER_ESCAPE + "29", ")"},
{INNER_ESCAPE + "2C", ","},
{INNER_ESCAPE + INNER_ESCAPE, INNER_ESCAPE},
{INNER_ESCAPE + "00", ""},
{INNER_ESCAPE + "3A", ":"},
{INNER_ESCAPE + "3B", ";"},
{INNER_ESCAPE + "5B", "["},
{INNER_ESCAPE + "5D", "]"},
{INNER_ESCAPE + "2F", "/"},
{INNER_ESCAPE + "5C", "\\"},
{INNER_ESCAPE + "3F", "?"},
{INNER_ESCAPE + "40", "@"},
{INNER_ESCAPE + "7E", "~"},
{INNER_ESCAPE + "26", "&"},
{INNER_ESCAPE + "2B", "+"},
{INNER_ESCAPE + "2A", "*"},
{INNER_ESCAPE + "23", "#"},
{INNER_ESCAPE + "24", "$"},
{INNER_ESCAPE + "21", "!"},
}),
new UnicodeUnescaper()
);
public static final CharSequenceTranslator UNESCAPE_UNICODE =
new AggregateTranslator(new UnicodeUnescaper());
public static final boolean isNCNameStartCharToEscape (char c) { // what about unicode?
return Character.isLowerCase(c)||Character.isUpperCase(c);
}
public String escapeToXsdLocalName(String localName) {
if ("".equals(localName)) return START_ESCAPE;
String s=ESCAPE_TO_XML_QNAME_LOCAL_NAME.translate(localName);
if (!isNCNameStartCharToEscape(s.charAt(0))) return START_ESCAPE + s;
return s;
}
public String unescapeFromXsdLocalName(String localName) {
if (START_ESCAPE.equals(localName)) {
return "";
}
if (localName.charAt(0)==START_ESCAPE.charAt(0)) {
localName=localName.substring(1);
}
String s=UNESCAPE_FROM_XML_QNAME_LOCAL_NAME.translate(localName);
return s;
}
public String escapeUnicode(String name) {
return ESCAPE_UNICODE.translate(name);
}
public String unescapeUnicode(String name) {
return UNESCAPE_UNICODE.translate(name);
}
static final String PN_CHARS_U="[A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD]"; // [#x10000-#xEFFFF]
static final String PN_CHARS="(" + PN_CHARS_U + "|[0-9\\-\\u00B7\\u0300-\\u036F\\u203F-\\u2040])";
static final String PN_CHARS_ESC="((\\\\)([\\=\\'\\(\\)\\,\\-\\:\\;\\[\\]\\.]))";
static final String HEX="[0-9A-Fa-f]";
static final String PERCENT="(%(" + HEX + ")(" + HEX +"))";
static final String PN_CHARS_OTHERS="(([/@~&\\+\\*\\?#$!])|" + PN_CHARS_ESC + "|" + PERCENT + ")";
static final String PN_LOCAL= "(((" + PN_CHARS_U + ")|([0-9])|(" + PN_CHARS_OTHERS + ")))"
+ "((((" + PN_CHARS + ")|(\\.)|(" + PN_CHARS_OTHERS + ")))*"
+ "(((" + PN_CHARS + ")|(" + PN_CHARS_OTHERS + "))))?";
final Pattern pat=Pattern.compile(QualifiedNameUtils.PN_LOCAL);
public boolean patternExactMatch (String input) {
if ("".equals(input)) return true;
Matcher match=pat.matcher(input);
if (match.find()) {
//System.out.println("found " + input.substring(match.start(),match.end()));
return match.start()==0 && match.end()==input.length();
} else {
return false;
}
}
/*
static final String XML_NameStartChar="[:A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD]"; // [#x10000-#xEFFFF]
static final String XML_NameChar="(" + XML_NameStartChar + "|[\\-\\.0-9\\u00B7\\u0300-\\u036F\\u203F-\\u2040])";
static final String XML_Name=XML_NameStartChar + "(" + XML_NameChar + ")*";
static final String NC_NameChar=XML_NameChar + "[^:]";
static final String NC_NameStartChar=XML_NameStartChar + "[^:]";
static final String NC_Name="("+NC_NameStartChar + "((" + NC_NameChar + ")*))";*/
static final String XML_NameStartChar="[A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD]"; // [#x10000-#xEFFFF]
static final String XML_NameChar="(" + XML_NameStartChar + "|[\\-\\.0-9\\u00B7\\u0300-\\u036F\\u203F-\\u2040])";
static final String XML_Name=XML_NameStartChar + "(" + XML_NameChar + ")*";
static final String NC_NameChar=XML_NameChar;
static final String NC_NameStartChar=XML_NameStartChar;
static final String NC_Name=NC_NameStartChar + "(" + NC_NameChar + ")*";
final Pattern NC_pat=Pattern.compile(QualifiedNameUtils.NC_Name);
public boolean is_NC_Name (String input) {
if ("".equals(input)) return false;
Matcher match=NC_pat.matcher(input);
if (match.matches()) {
//System.out.println("found " + input.substring(match.start(),match.end()));
return match.start()==0 && match.end()==input.length();
} else {
return false;
}
}
}