package org.ccnx.ccn.protocol; import static org.ccnx.ccn.profiles.CommandMarker.COMMAND_MARKER_NONCE; import java.math.BigInteger; import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.util.Random; import org.bouncycastle.util.Arrays; import org.ccnx.ccn.impl.support.DataUtils; import org.ccnx.ccn.protocol.ContentName.ComponentProvider; /** * Wrapper class to store immutable name components. */ public class Component implements ComponentProvider { byte[] component; protected Component(byte[] comp) { component = comp; } /** * Create a component from a native string. * @param text native text string. */ public Component(String text) { component = parseNative(text); } public byte[] getComponent() { return component; } /** * Parse native string component: just UTF-8 encode * For full names in native strings only "/" is special * but for an individual component we will even allow that. * This method intentionally throws no declared exceptions * so you can be confident in encoding any native Java String * TODO make this use Java string escaping rules? * @param name Component as native Java string */ public static byte[] parseNative(String name) { // Handle exception s around missing UTF-8 return DataUtils.getBytesFromUTF8String(name); } /** * Indicates an attempt to parse a .. component. */ public static class DotDot extends Exception { // Need to strip off a component private static final long serialVersionUID = 4667513234636853164L; } /** * Parse the URI Generic Syntax of RFC 3986. * Including handling percent encoding of sequences that are not legal character * encodings in any character set. This method is the inverse of * printComponent() and for any input sequence of bytes it must be the case * that parseComponent(printComponent(input)) == input. Note that the inverse * is NOT true printComponent(parseComponent(input)) != input in general. * * @see fromURI(String) * * Note in particular that this method interprets sequences of more than * two dots ('.') as representing an empty component or dot component value * as encoded by componentPrint. That is, the component value will be * the value obtained by removing three dots. * @param name a single component of a name, URI encoded * @return a name component */ public static byte[] parseURI(String name) throws DotDot, URISyntaxException { byte[] decodedName = null; boolean alldots = true; // does this component contain only dots after unescaping? boolean quitEarly = false; ByteBuffer result = ByteBuffer.allocate(name.length()); for (int i = 0; i < name.length() && !quitEarly; i++) { char ch = name.charAt(i); switch (ch) { case '%': // This is a byte string %xy where xy are hex digits // Since the input string must be compatible with the output // of componentPrint(), we may convert the character values directly. if (name.length()-1 < i+2) { throw new URISyntaxException(name, "malformed %xy byte representation: too short", i); } int b1 = Character.digit(name.charAt(++i), 16); // consume x int b2 = Character.digit(name.charAt(++i), 16); // consume y if (b1 < 0 || b2 < 0) throw new URISyntaxException(name, "malformed %xy byte representation: not legal hex number: " + name.substring(i-2, i+1), i-2); result.put((byte)((b1 * 16) + b2)); break; // Note in C lib case 0 is handled like the two general delimiters below that terminate processing // but that case should never arise in Java which uses real unicode characters. case '/': case '?': case '#': quitEarly = true; // early exit from containing loop break; case ':': case '[': case ']': case '@': case '!': case '$': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case ';': case '=': // Permit unescaped reserved characters result.put((byte)ch); break; default: if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || ch == '-' || ch == '.' || ch == '_' || ch == '~') { // This character remains the same result.put((byte)ch); } else { throw new URISyntaxException(name, "Illegal characters in URI", i); } break; } if (!quitEarly && result.get(result.position()-1) != '.') { alldots = false; } } result.flip(); if (alldots) { if (result.limit() <= 1) { return null; } else if (result.limit() == 2) { throw new DotDot(); } else { // Remove the three '.' extra result.limit(result.limit()-3); } } decodedName = new byte[result.limit()]; System.arraycopy(result.array(), 0, decodedName, 0, result.limit()); return decodedName; } public static String hexPrint(byte [] bs) { if (null == bs) return new String(); BigInteger bi = new BigInteger(1,bs); return bi.toString(16); } public static String printNative(byte[] bs) { // Native string print is the one place where we can just use // Java native platform decoding. Note that this is not // necessarily invertible, since there may be byte sequences // that do not correspond to any legal native character encoding // that may be converted to e.g. Unicode "Replacement Character" U+FFFD. return new String(bs); } public static String printURI(byte [] bs) { return printURI(bs, 0, bs.length); } static final char HEX_DIGITS[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; /** * Print bytes in the URI Generic Syntax of RFC 3986 * including byte sequences that are not legal character * encodings in any character set and byte sequences that have special * meaning for URI resolution per RFC 3986. This is designed to match * the C library URI encoding. * * This method must be invertible by parseComponent() so * for any input sequence of bytes it must be the case * that parseComponent(printComponent(input)) == input. * * All bytes that are unreserved characters per RFC 3986 are left unescaped. * Other bytes are percent encoded. * * Empty path components and path components "." and ".." have special * meaning for relative URI resolution per RFC 3986. To guarantee * these component variations are preserved and recovered exactly when * the URI is parsed by parseComponent() we use a convention that * components that are empty or consist entirely of '.' characters will * have "..." appended. This is intended to be consistent with the CCN C * library handling of URI representation of names. * @param bs input byte array. * @return */ public static String printURI(byte[] bs, int offset, int length) { int i; if (null == bs || bs.length == 0) { // Empty component represented by three '.' return "..."; } // To get enough control over the encoding, we use // our own loop and NOT simply new String(bs) (or java.net.URLEncoder) because // the String constructor will decode illegal UTF-8 sub-sequences // with Unicode "Replacement Character" U+FFFD. We could use a CharsetDecoder // to detect the illegal UTF-8 sub-sequences and handle them separately, // except that this is almost certainly less efficient and some versions of Java // have bugs that prevent flagging illegal overlong UTF-8 encodings (CVE-2008-2938). // Also, it is much easier to verify what this is doing and compare to the C library implementation. // // Initial allocation is based on the documented behavior of StringBuilder's buffer // expansion algorithm being 2+2*length if expansion is required. StringBuilder result = new StringBuilder((1 + 3 * bs.length) / 2); for (i = 0; i < bs.length && bs[i] == '.'; i++) { continue; } if (i == bs.length) { // all dots result.append("..."); } for (i = 0; i < bs.length; i++) { char ch = (char) bs[i]; if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || ch == '-' || ch == '.' || ch == '_' || ch == '~') result.append(ch); else { result.append('%'); result.append(HEX_DIGITS[(ch >> 4) & 0xF]); result.append(HEX_DIGITS[ch & 0xF]); } } return result.toString(); } private static Random random = new Random(); /** * Generates a random nonce component (with a nonce CommandMarker header). * Can be used in ContentName constructors where a nonce is required. * Note: the nonce component generated will be different every time this * is used. */ public static final ComponentProvider NONCE = new ComponentProvider() { public byte[] getComponent() { byte [] nonce = new byte[8]; random.nextBytes(nonce); return COMMAND_MARKER_NONCE.addBinaryData(nonce); } }; private static byte[] emptyComponent = new byte[]{ }; /** * This object generates an empty component (length = 0). */ public static final ComponentProvider EMPTY = new ComponentProvider() { public byte[] getComponent() { return emptyComponent; } }; @Override public boolean equals(Object obj) { if (obj instanceof byte[]) return Arrays.areEqual( (byte[])obj, getComponent() ); if (obj instanceof ComponentProvider) return Arrays.areEqual( ((ComponentProvider)obj).getComponent(), getComponent() ); if (obj instanceof String) return Arrays.areEqual( ((String)obj).getBytes(), getComponent() ); return super.equals(obj); } @Override public int hashCode() { return Arrays.hashCode(getComponent()); } @Override public String toString() { return printURI(component); } }