Component.java example

Explorer
alien-ofelia-conet-ccnx-master
package org.ccnx.ccn.protocol;

import static org.ccnx.ccn.profiles.CommandMarker.COMMAND_MARKER_NONCE;

import java.math.BigInteger;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.util.Random;

import org.bouncycastle.util.Arrays;
import org.ccnx.ccn.impl.support.DataUtils;
import org.ccnx.ccn.protocol.ContentName.ComponentProvider;

/**
 * Wrapper class to store immutable name components.
 */
public class Component implements ComponentProvider {

	byte[] component;
	
	protected Component(byte[] comp) {
		component = comp;
	}

	/**
	 * Create a component from a native string.
	 * @param text native text string.
	 */
	public Component(String text) {
		component = parseNative(text);
	}

	public byte[] getComponent() {
		return component;
	}

	/**
	 * Parse native string component: just UTF-8 encode
	 * For full names in native strings only "/" is special
	 * but for an individual component we will even allow that.
	 * This method intentionally throws no declared exceptions
	 * so you can be confident in encoding any native Java String
	 * TODO make this use Java string escaping rules?
	 * @param name Component as native Java string
	 */
	public static byte[] parseNative(String name) {
		// Handle exception s around missing UTF-8
		return DataUtils.getBytesFromUTF8String(name);
	}

	/**
	 * Indicates an attempt to parse a .. component.
	 */
	public static class DotDot extends Exception { // Need to strip off a component
		private static final long serialVersionUID = 4667513234636853164L;
	}

	/**
	 * Parse the URI Generic Syntax of RFC 3986.
	 * Including handling percent encoding of sequences that are not legal character
	 * encodings in any character set.  This method is the inverse of 
	 * printComponent() and for any input sequence of bytes it must be the case
	 * that parseComponent(printComponent(input)) == input.  Note that the inverse
	 * is NOT true printComponent(parseComponent(input)) != input in general.
	 *  
	 * @see fromURI(String)
	 * 
	 * Note in particular that this method interprets sequences of more than
	 * two dots ('.') as representing an empty component or dot component value
	 * as encoded by componentPrint.  That is, the component value will be 
	 * the value obtained by removing three dots.
	 * @param name a single component of a name, URI encoded
	 * @return a name component
	 */
	public static byte[] parseURI(String name) throws DotDot, URISyntaxException {
		byte[] decodedName = null;
		boolean alldots = true; // does this component contain only dots after unescaping?
		boolean quitEarly = false;
	
		ByteBuffer result = ByteBuffer.allocate(name.length());
		for (int i = 0; i < name.length() && !quitEarly; i++) {
			char ch = name.charAt(i);
			switch (ch) {
			case '%': 
				// This is a byte string %xy where xy are hex digits
				// Since the input string must be compatible with the output
				// of componentPrint(), we may convert the character values directly.
				if (name.length()-1 < i+2) {
					throw new URISyntaxException(name, "malformed %xy byte representation: too short", i);
				}
				int b1 = Character.digit(name.charAt(++i), 16); // consume x
				int b2 = Character.digit(name.charAt(++i), 16); // consume y
				if (b1 < 0 || b2 < 0)
					throw new URISyntaxException(name, "malformed %xy byte representation: not legal hex number: " + name.substring(i-2, i+1), i-2);
				result.put((byte)((b1 * 16) + b2));
				break;
				// Note in C lib case 0 is handled like the two general delimiters below that terminate processing 
				// but that case should never arise in Java which uses real unicode characters.
			case '/':
			case '?':
			case '#':
				quitEarly = true; // early exit from containing loop
				break;
			case ':': case '[': case ']': case '@':
			case '!': case '$': case '&': case '\'': case '(': case ')':
			case '*': case '+': case ',': case ';': case '=':
				// Permit unescaped reserved characters
				result.put((byte)ch);
				break;
			default: 
				if (('a' <= ch && ch <= 'z') ||
						('A' <= ch && ch <= 'Z') ||
						('0' <= ch && ch <= '9') ||
						ch == '-' || ch == '.' || ch == '_' || ch == '~') {
					// This character remains the same
					result.put((byte)ch);
				} else {
					throw new URISyntaxException(name, "Illegal characters in URI", i);
				}
				break;
			}
			if (!quitEarly && result.get(result.position()-1) != '.') {
				alldots = false;
			}
		}
		result.flip();
		if (alldots) {
			if (result.limit() <= 1) {
				return null;
			} else if (result.limit() == 2) {
				throw new DotDot();
			} else {
				// Remove the three '.' extra
				result.limit(result.limit()-3);
			}
		}
		decodedName = new byte[result.limit()];
		System.arraycopy(result.array(), 0, decodedName, 0, result.limit());
		return decodedName;
	}

	public static String hexPrint(byte [] bs) {
		if (null == bs)
			return new String();
	
		BigInteger bi = new BigInteger(1,bs);
		return bi.toString(16);
	}

	public static String printNative(byte[] bs) {
		// Native string print is the one place where we can just use
		// Java native platform decoding.  Note that this is not 
		// necessarily invertible, since there may be byte sequences 
		// that do not correspond to any legal native character encoding
		// that may be converted to e.g. Unicode "Replacement Character" U+FFFD.
		return new String(bs);
	}

	public static String printURI(byte [] bs) {
		return printURI(bs, 0, bs.length);
	}

	static final char HEX_DIGITS[] = {
		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
	};

	/**
	 * Print bytes in the URI Generic Syntax of RFC 3986 
	 * including byte sequences that are not legal character
	 * encodings in any character set and byte sequences that have special 
	 * meaning for URI resolution per RFC 3986.  This is designed to match
	 * the C library URI encoding.
	 * 
	 * This method must be invertible by parseComponent() so 
	 * for any input sequence of bytes it must be the case
	 * that parseComponent(printComponent(input)) == input.
	 * 
	 * All bytes that are unreserved characters per RFC 3986 are left unescaped.
	 * Other bytes are percent encoded.
	 * 
	 * Empty path components and path components "." and ".." have special 
	 * meaning for relative URI resolution per RFC 3986.  To guarantee 
	 * these component variations are preserved and recovered exactly when
	 * the URI is parsed by parseComponent() we use a convention that 
	 * components that are empty or consist entirely of '.' characters will 
	 * have "..." appended.  This is intended to be consistent with the CCN C 
	 * library handling of URI representation of names.
	 * @param bs input byte array.
	 * @return
	 */
	public static String printURI(byte[] bs, int offset, int length) {
		int i;
		if (null == bs || bs.length == 0) {
			// Empty component represented by three '.'
			return "...";
		}
		// To get enough control over the encoding, we use 
		// our own loop and NOT simply new String(bs) (or java.net.URLEncoder) because
		// the String constructor will decode illegal UTF-8 sub-sequences
		// with Unicode "Replacement Character" U+FFFD.  We could use a CharsetDecoder
		// to detect the illegal UTF-8 sub-sequences and handle them separately,
		// except that this is almost certainly less efficient and some versions of Java 
		// have bugs that prevent flagging illegal overlong UTF-8 encodings (CVE-2008-2938).
		// Also, it is much easier to verify what this is doing and compare to the C library implementation.
		//
		// Initial allocation is based on the documented behavior of StringBuilder's buffer
		// expansion algorithm being 2+2*length if expansion is required.
		StringBuilder result = new StringBuilder((1 + 3 * bs.length) / 2);
		for (i = 0; i < bs.length && bs[i] == '.'; i++) {
			continue;
		}
		if (i == bs.length) {
			// all dots
			result.append("...");
		}
		for (i = 0; i < bs.length; i++) {
			char ch = (char) bs[i];
			if (('a' <= ch && ch <= 'z') ||
					('A' <= ch && ch <= 'Z') ||
					('0' <= ch && ch <= '9') ||
					ch == '-' || ch == '.' || ch == '_' || ch == '~')
				result.append(ch);
			else {
				result.append('%');
				result.append(HEX_DIGITS[(ch >> 4) & 0xF]);
				result.append(HEX_DIGITS[ch & 0xF]);
			}
		}
		return result.toString();
	}

	private static Random random = new Random();
	/**
	 * Generates a random nonce component (with a nonce CommandMarker header).
	 * Can be used in ContentName constructors where a nonce is required.
	 * Note: the nonce component generated will be different every time this
	 * is used.
	 */
	public static final ComponentProvider NONCE = new ComponentProvider() {
		public byte[] getComponent() {
			byte [] nonce = new byte[8];
			random.nextBytes(nonce);
			return COMMAND_MARKER_NONCE.addBinaryData(nonce);
		}
	};

	private static byte[] emptyComponent = new byte[]{ };
	/**
	 * This object generates an empty component (length = 0).
	 */
	public static final ComponentProvider EMPTY = new ComponentProvider() {
		public byte[] getComponent() {
			return emptyComponent;
		}
	};

	@Override
	public boolean equals(Object obj) {
		if (obj instanceof byte[])
			return Arrays.areEqual( (byte[])obj, getComponent() );
		if (obj instanceof ComponentProvider)
			return Arrays.areEqual( ((ComponentProvider)obj).getComponent(), getComponent() );
		if (obj instanceof String)
			return Arrays.areEqual( ((String)obj).getBytes(), getComponent() );
		return super.equals(obj);
	}

	@Override
	public int hashCode() {
		return Arrays.hashCode(getComponent());
	}

	@Override
	public String toString() {
		return printURI(component);
	}
}