Component.java example

Explorer
ccnx-master
/*
 * Part of the CCNx Java Library.
 *
 * Copyright (C) 2008-2013 Palo Alto Research Center, Inc.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License version 2.1
 * as published by the Free Software Foundation.
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details. You should have received
 * a copy of the GNU Lesser General Public License along with this library;
 * if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
 * Fifth Floor, Boston, MA 02110-1301 USA.
 */

package org.ccnx.ccn.protocol;

import static org.ccnx.ccn.profiles.CommandMarker.COMMAND_MARKER_NONCE;

import java.math.BigInteger;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.util.Random;

import org.bouncycastle.util.Arrays;
import org.ccnx.ccn.impl.support.DataUtils;
import org.ccnx.ccn.protocol.ContentName.ComponentProvider;

/**
 * Wrapper class to store immutable name components.
 */
public class Component implements ComponentProvider {

	byte[] component;

	protected Component(byte[] comp) {
		this.component = comp;
	}

	/**
	 * Create a component from a native string.
	 * @param text native text string.
	 */
	public Component(String text) {
		this.component = parseNative(text);
	}

	@Override
    public byte[] getComponent() {
		return this.component;
	}

	/**
	 * Parse native string component: just UTF-8 encode
	 * For full names in native strings only "/" is special
	 * but for an individual component we will even allow that.
	 * This method intentionally throws no declared exceptions
	 * so you can be confident in encoding any native Java String
	 * TODO make this use Java string escaping rules?
	 * @param name Component as native Java string
	 */
	public static byte[] parseNative(String name) {
		// Handle exception s around missing UTF-8
		return DataUtils.getBytesFromUTF8String(name);
	}

	/**
	 * Indicates an attempt to parse a .. component.
	 */
	public static class DotDot extends Exception { // Need to strip off a component
		private static final long serialVersionUID = 4667513234636853164L;
	}

    private static final boolean uriReserved(char ch) {
        if (('a' <= ch && ch <= 'z') ||
            ('A' <= ch && ch <= 'Z') ||
            ('0' <= ch && ch <= '9') ||
            ch == '-' || ch == '.' || ch == '_' || ch == '~')
            return false;
        return true;
    }

	/**
	 * Parse the URI Generic Syntax of RFC 3986.
	 * Including handling percent encoding of sequences that are not legal character
	 * encodings in any character set.  This method is the inverse of
	 * printComponent() and for any input sequence of bytes it must be the case
	 * that parseComponent(printComponent(input)) == input.  Note that the inverse
	 * is NOT true printComponent(parseComponent(input)) != input in general.
	 *
	 * @see fromURI(String)
	 *
	 * Note in particular that this method interprets sequences of more than
	 * two dots ('.') as representing an empty component or dot component value
	 * as encoded by componentPrint.  That is, the component value will be
	 * the value obtained by removing three dots.
	 * @param name a single component of a name, URI encoded
	 * @return a name component
	 */
	public static byte[] parseURI(String name) throws DotDot, URISyntaxException {
		byte[] decodedName = null;
		boolean alldots = true; // does this component contain only dots after unescaping?
		boolean quitEarly = false;
		boolean hexEncoding = false;
		int b1, b2;

		ByteBuffer result = ByteBuffer.allocate(name.length());
		for (int i = 0; i < name.length() && !quitEarly; i++) {
			char ch = name.charAt(i);
			switch (ch) {
			case '%':
				// This is a byte string %xy where xy are hex digits
				// Since the input string must be compatible with the output
				// of componentPrint(), we may convert the character values directly.
				if (name.length()-1 < i+2) {
					throw new URISyntaxException(name, "malformed %xy byte representation: too short", i);
				}
				b1 = Character.digit(name.charAt(++i), 16); // consume x
				b2 = Character.digit(name.charAt(++i), 16); // consume y
				if (b1 < 0 || b2 < 0)
					throw new URISyntaxException(name, "malformed %xy byte representation: not legal hex number: " + name.substring(i-2, i+1), i-2);
				result.put((byte)((b1 * 16) + b2));
				break;
				// Note in C lib case 0 is handled like the two general delimiters below that terminate processing
				// but that case should never arise in Java which uses real unicode characters.
			case '/':
			case '?':
			case '#':
				quitEarly = true; // early exit from containing loop
				break;
			case '=':
				if (name.length()-1 < i+2 || ((name.length() - i) & 1) == 0) {
					throw new URISyntaxException(name, "malformed =xy byte representation: too short", i);
				}
				hexEncoding = true;
				break;
			case ':': case '[': case ']': case '@':
			case '!': case '$': case '&': case '\'': case '(': case ')':
			case '*': case '+': case ',': case ';':
				// Permit unescaped reserved characters
				result.put((byte)ch);
				break;
			default:
				if (uriReserved(ch))
					throw new URISyntaxException(name, "Illegal characters in URI", i);

				if (hexEncoding) {
					b1 = Character.digit(ch, 16); // consume x
					b2 = Character.digit(name.charAt(++i), 16); // consume y
					if (b1 < 0 || b2 < 0)
						throw new URISyntaxException(name, "malformed =xy byte representation: not legal hex number: " + name.substring(i-1, i), i-1);
					result.put((byte)((b1 * 16) + b2));
				} else {
					// This character remains the same
					result.put((byte)ch);
				}
				break;
			}
			if (!quitEarly && result.position() > 0 && result.get(result.position()-1) != '.') {
				alldots = false;
			}
		}
		result.flip();
		if (alldots) {
			if (result.limit() <= 1) {
				return null;
			} else if (result.limit() == 2) {
				throw new DotDot();
			} else {
				// Remove the three '.' extra
				result.limit(result.limit()-3);
			}
		}
		decodedName = new byte[result.limit()];
		System.arraycopy(result.array(), 0, decodedName, 0, result.limit());
		return decodedName;
	}

	public static String hexPrint(byte [] bs) {
		if (null == bs)
			return new String();

		BigInteger bi = new BigInteger(1,bs);
		return bi.toString(16);
	}

	public static String printNative(byte[] bs) {
		// Native string print is the one place where we can just use
		// Java native platform decoding.  Note that this is not
		// necessarily invertible, since there may be byte sequences
		// that do not correspond to any legal native character encoding
		// that may be converted to e.g. Unicode "Replacement Character" U+FFFD.
		return new String(bs);
	}

	/**
	 * Internal flag signalling the use of the old-style percent-encoding,
	 * or the new mixed-style using percent-encoding and strings of hexadecimal digits.
	 *
	 */
    static enum URIEscape {
        /** Use RFC-3986 S2.1 percent-encoding for unprintable characters in the component name. */
        PERCENT,
        /** Use mixed-form of percent-encoding and '='{digits} encoding for unprintable characters in the component name. */
        MIXED
    }

	static final char HEX_DIGITS[] = {
		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
	};


	public static String printURI(byte [] bs) {
		return printURI(bs, 0, bs.length, URIEscape.MIXED);
	}

	public static String printURI(byte [] bs, int offset, int length) {
		return printURI(bs, offset, length, URIEscape.MIXED);
	}

	/**
	 * Print bytes in the URI Generic Syntax of RFC 3986
	 * including byte sequences that are not legal character
	 * encodings in any character set and byte sequences that have special
	 * meaning for URI resolution per RFC 3986.  This is designed to match
	 * the C library URI encoding.
	 * <p>
	 * This method must be invertible by parseComponent() so
	 * for any input sequence of bytes it must be the case
	 * that parseComponent(printComponent(input)) == input.
	 * </p>
	 * <p>
	 * All bytes that are unreserved characters per RFC 3986 are left unescaped.
	 * Other bytes are percent encoded.
	 * </p>
	 * <p>
	 * Empty path components and path components "." and ".." have special
	 * meaning for relative URI resolution per RFC 3986.  To guarantee
	 * these component variations are preserved and recovered exactly when
	 * the URI is parsed by parseComponent() we use a convention that
	 * components that are empty or consist entirely of '.' characters will
	 * have "..." appended.  This is intended to be consistent with the CCN C
	 * library handling of URI representation of names.
	 * </p>
	 * @param bs input byte array.
	 * @return
	 */
	private static String printURI(byte[] bs, int offset, int length, URIEscape escape) {
		int i;
        boolean hexEncoding = false;
		if (null == bs || bs.length == 0) {
			// Empty component represented by three '.'
			return "...";
		}
		// To get enough control over the encoding, we use
		// our own loop and NOT simply new String(bs) (or java.net.URLEncoder) because
		// the String constructor will decode illegal UTF-8 sub-sequences
		// with Unicode "Replacement Character" U+FFFD.  We could use a CharsetDecoder
		// to detect the illegal UTF-8 sub-sequences and handle them separately,
		// except that this is almost certainly less efficient and some versions of Java
		// have bugs that prevent flagging illegal overlong UTF-8 encodings (CVE-2008-2938).
		// Also, it is much easier to verify what this is doing and compare to the C library implementation.
		//
		// Initial allocation is based on the documented behavior of StringBuilder's buffer
		// expansion algorithm being 2+2*length if expansion is required.
		StringBuilder result = new StringBuilder((1 + 3 * bs.length) / 2);
		for (i = 0; i < bs.length && bs[i] == '.'; i++) {
			continue;
		}
		if (i == bs.length) {
			// all dots
			result.append("...");
		}
        // components starting in either %00 (segments) or %FD (\375, versions) should
        // be displayed as hex encoded regardless of whether the next byte is
        // a printable character.  Should match the corresponding code in the C library.
        if (escape == URIEscape.MIXED && (bs[0] == (byte)'\000' || bs[0] == (byte)'\375')) {
            hexEncoding = true;
            result.append("=");
        }
        // If the option of limiting escaping to percent disappears this
        // branch of the if can also disappear.
        if (escape == URIEscape.PERCENT) {
        	for (i = 0; i < bs.length; i++) {
        		char ch = (char) bs[i];
        		if (!uriReserved(ch)) {
        			result.append(ch);
        		} else {
        			result.append('%');
        			result.append(HEX_DIGITS[(ch >> 4) & 0xF]);
        			result.append(HEX_DIGITS[ch & 0xF]);
        		}
        	}

        } else {
        	for (i = 0; i < bs.length; i++) {
        		char ch = (char) bs[i];
        		if (hexEncoding) {
        			result.append(HEX_DIGITS[(ch >> 4) & 0xF]);
        			result.append(HEX_DIGITS[ch & 0xF]);
        		} else if (!uriReserved(ch))
        			result.append(ch);
        		else {
        			if (bs.length == (i + 1) || !uriReserved((char)bs[i + 1]))
        				result.append('%');
        			else {
        				result.append('=');
        				hexEncoding = true;
        			}
        			result.append(HEX_DIGITS[(ch >> 4) & 0xF]);
        			result.append(HEX_DIGITS[ch & 0xF]);
        		}
        	}
        }
        return result.toString();
	}

	private static Random random = new Random();
	/**
	 * Generates a random nonce component (with a nonce CommandMarker header).
	 * Can be used in ContentName constructors where a nonce is required.
	 * Note: the nonce component generated will be different every time this
	 * is used.
	 */
	public static final ComponentProvider NONCE = new ComponentProvider() {
		@Override
        public byte[] getComponent() {
			byte [] nonce = new byte[8];
			random.nextBytes(nonce);
			return COMMAND_MARKER_NONCE.addBinaryData(nonce);
		}
	};

	private static byte[] emptyComponent = new byte[]{ };
	/**
	 * This object generates an empty component (length = 0).
	 */
	public static final ComponentProvider EMPTY = new ComponentProvider() {
		@Override
        public byte[] getComponent() {
			return emptyComponent;
		}
	};

	@Override
	public boolean equals(Object obj) {
		if (obj instanceof byte[])
			return Arrays.areEqual( (byte[])obj, this.getComponent() );
		if (obj instanceof ComponentProvider)
			return Arrays.areEqual( ((ComponentProvider)obj).getComponent(), this.getComponent() );
		if (obj instanceof String)
			return Arrays.areEqual( ((String)obj).getBytes(), this.getComponent() );
		return super.equals(obj);
	}

	@Override
	public int hashCode() {
		return Arrays.hashCode(this.getComponent());
	}

	@Override
	public String toString() {
		return printURI(this.component);
	}
}