urlResolver.java example

Explorer
openbd-core-master
- src
/* 
 *  Copyright (C) 2000 - 2011 TagServlet Ltd
 *
 *  This file is part of Open BlueDragon (OpenBD) CFML Server Engine.
 *  
 *  OpenBD is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  Free Software Foundation,version 3.
 *  
 *  OpenBD is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with OpenBD.  If not, see http://www.gnu.org/licenses/
 *  
 *  Additional permission under GNU GPL version 3 section 7
 *  
 *  If you modify this Program, or any covered work, by linking or combining 
 *  it with any of the JARS listed in the README.txt (or a modified version of 
 *  (that library), containing parts covered by the terms of that JAR, the 
 *  licensors of this Program grant you additional permission to convey the 
 *  resulting work. 
 *  README.txt @ http://www.openbluedragon.org/license/README.txt
 *  
 *  http://openbd.org/
 *  $Id: $
 */

package com.nary.net.http;

import java.io.CharArrayWriter;

import com.nary.net.tagFilter;

/**
 * this class, given a url and port, will resolve urls in tags passed to it when a tagFilterInputStream instance calls process tag. Note this convert all urls. Only : - img src - a href - form action - applet code - script src - embed src - embed pluginspace - body background - frame src - bgsound src - object data - object classid - object codebase - object usemap
 */

public class urlResolver implements tagFilter {

	private CharArrayWriter wordStream;

	private final static byte DEFAULT = 0, BASE = 1;

	private String sourceURL;

	private String baseURL = null;

	private final static int capsToSmallGap = (int) 'a' - (int) 'A';

	// for use in reading in and coverting tags
	private CharArrayWriter temp;

	private char[] buffer;

	private int bufferAt;

	private UrlLinkResolver urlutils;

	public urlResolver(String _url, int _port) {
		// sourceURL must be absolute
		sourceURL = _url;

		// convert the url so that it contains all /'s as opposed to \'s,
		// and so that it ends with a '/'
		sourceURL.replace('\\', '/');

		// ensure url ends with a /
		if (sourceURL.lastIndexOf('/') <= 7) {
			sourceURL += "/";
		}

		if (!sourceURL.endsWith("/")) {
			sourceURL = sourceURL.substring(0, sourceURL.lastIndexOf("/") + 1);
		}

		int thirdSlashIndex = sourceURL.indexOf('/', 7);
		// if a port number isn't given in the url, add it in
		if (_port != -1 && sourceURL.indexOf(':', sourceURL.indexOf(':') + 1) == -1) {
			sourceURL = sourceURL.substring(0, thirdSlashIndex) + ":" + _port + sourceURL.substring(thirdSlashIndex);
		}

		temp = new CharArrayWriter();
		wordStream = new CharArrayWriter();
		urlutils = new UrlLinkResolver();
	}

	public char[] processTag(char[] _tag) {
		buffer = _tag;
		bufferAt = 0;

		// reset stream used for copying new tag into
		temp.reset();

		// notes: first byte should be a '<', last byte should be a '>' (might not be LATER)
		bufferAt = 0;

		// get the '<'
		temp.write(buffer[bufferAt]);
		bufferAt++;

		skipWhitespace();

		char[] firstWord = getNextWord();

		try {
			temp.write(firstWord);
		} catch (java.io.IOException ignored) {
		}

		// if A
		if (compareChars(firstWord, new char[] { 'A' }) || compareChars(firstWord, new char[] { 'L', 'I', 'N', 'K' })) {
			// get href
			processRestOfTag(new char[][] { { 'h', 'r', 'e', 'f' } }, DEFAULT);
		}

		// if form
		else if (compareChars(firstWord, new char[] { 'F', 'O', 'R', 'M' })) {
			// get action
			processRestOfTag(new char[][] { { 'a', 'c', 't', 'i', 'o', 'n' } }, DEFAULT);
		}

		// if embed
		else if (compareChars(firstWord, new char[] { 'E', 'M', 'B', 'E', 'D' })) {
			// get pluginspace, src
			processRestOfTag(new char[][] { { 'p', 'l', 'u', 'g', 'i', 'n', 's', 'p', 'a', 'c', 'e' }, { 's', 'r', 'c' } }, DEFAULT);
		}

		// if frame, bgsound, img, script, base
		else if (compareChars(firstWord, new char[] { 'F', 'R', 'A', 'M', 'E' }) || compareChars(firstWord, new char[] { 'B', 'G', 'S', 'O', 'U', 'N', 'D' }) || compareChars(firstWord, new char[] { 'S', 'C', 'R', 'I', 'P', 'T' }) || compareChars(firstWord, new char[] { 'I', 'M', 'G' })) {
			// get src
			processRestOfTag(new char[][] { { 's', 'r', 'c' } }, DEFAULT);
		}

		// if base
		else if (compareChars(firstWord, new char[] { 'B', 'A', 'S', 'E' })) {
			// get src
			processRestOfTag(new char[][] { { 's', 'r', 'c' } }, BASE);
		}

		// if body
		else if (compareChars(firstWord, new char[] { 'B', 'O', 'D', 'Y' }) || compareChars(firstWord, new char[] { 'T', 'D' })) {
			// get background
			processRestOfTag(new char[][] { { 'b', 'a', 'c', 'k', 'g', 'r', 'o', 'u', 'n', 'd' } }, DEFAULT);
		}

		// NOTE: Object and Applet tags are special cases that may involve a codebase
		// if object
		else if (compareChars(firstWord, new char[] { 'O', 'B', 'J', 'E', 'C', 'T' })) {
			processObjectTag();
		}

		// if applet
		else if (compareChars(firstWord, new char[] { 'A', 'P', 'P', 'L', 'E', 'T' })) {
			processAppletTag();
		}

		else {
			// not a tag that has any urls that require resolving, so just
			// return the untouched buffer
			return buffer;
		}

		// get the '>'
		temp.write(buffer[bufferAt]);
		bufferAt++;

		return temp.toCharArray();
	}// checkTag()

	private void processRestOfTag(char[][] _keywords, byte _tagType) {
		try {
			int bufferLen = buffer.length;
			// while haven't reached the '>'
			while (bufferAt < bufferLen - 1) {

				skipWhitespace();

				char[] word = getNextWord();
				temp.write(word);

				// if it's a tag then get the value
				if (isKeyword(word, _keywords) != -1) {
					skipWhitespace();
					if (buffer[bufferAt] == '=') {
						temp.write(buffer[bufferAt]);
						bufferAt++;
						skipWhitespace();
						processURI(getURI(), _tagType);
					}
				}

			}// while
		} catch (java.io.IOException ignored) {
		}

	}// processRestOfTag()

	private void skipWhitespace() {
		// skip LWS
		while ((bufferAt < buffer.length) && buffer[bufferAt] == ' ' || buffer[bufferAt] == '\r' || buffer[bufferAt] == '\n' || buffer[bufferAt] == '\t') {
			temp.write(buffer[bufferAt]);
			bufferAt++;
		}
	}// skipWhitespace

	/**
	 * returns the next word in the buffer (not the stream) [used to parse the buffer]
	 */
	private char[] getNextWord() {
		wordStream.reset();

		// while haven't reached the end of the tag & current character is ok
		while ((bufferAt < buffer.length - 1) && (isChar(buffer[bufferAt]))) {
			wordStream.write(buffer[bufferAt]);
			bufferAt++;
		}

		return wordStream.toCharArray();
	}// getNextWord

	// return if character is a legal character other than '='
	// except in case where '=' is treated as a word itself
	private boolean isChar(char ch) {
		return ((ch < 0) || (ch > 32 && ch != 61) || (ch == 61 && wordStream.size() == 0));
	}// isChar()

	/**
	 * returns true if the given word is a tag keyword from the tag list 'tags'
	 */

	private int isKeyword(char[] word, char[][] _keywords) {
		int keywordIndex = 0;
		int wordIndex = 0;

		// check for each known tag
		for (int keywordNum = 0; keywordNum < _keywords.length; keywordNum++) {
			keywordIndex = 0;
			int wordLen = _keywords[keywordNum].length;

			// no point comparing this tag if word lengths don't match
			if (word.length != wordLen)
				continue;

			// while the char in the word matches the char in the tag
			// AND the end of the tag hasn't been reached
			while (keywordIndex < wordLen && (toSmall(word[wordIndex]) == _keywords[keywordNum][keywordIndex])) {
				wordIndex++;
				keywordIndex++;
			}

			if (keywordIndex == wordLen) {
				return keywordNum;
			}
		}

		// no tags match
		return -1;
	}// isKeyword()

	/**
	 * gets the next uri from the byte stream returning it as a char[]
	 */

	private char[] getURI() {
		wordStream.reset();
		// if next char is " then get next chars up til the next "
		if (buffer[bufferAt] == '"' || buffer[bufferAt] == '\'') {
			// don't write the "
			bufferAt++;

			this.skipWhitespace();
			// if the uri given is just "    "
			if (buffer[bufferAt] == '"' || buffer[bufferAt] == '\'') {
				return new char[0];
			}

			// while haven't reached the end '>' or the " for
			while ((bufferAt < buffer.length - 1) && (buffer[bufferAt] != '"') && (buffer[bufferAt] != '\'')) {
				wordStream.write(buffer[bufferAt]);
				bufferAt++;
			}

			// if stopped looping because " found
			if (bufferAt != buffer.length - 1) {
				// don't write the "
				bufferAt++;
			}

		}
		// else get the next chars up til the next white space or carriage return
		else {
			// fix this line to make it more efficient
			while ((bufferAt < buffer.length - 1) && (buffer[bufferAt] != '"') && (buffer[bufferAt] != '\'') && buffer[bufferAt] != '\n' && buffer[bufferAt] != ' ') {
				wordStream.write(buffer[bufferAt]);
				bufferAt++;
			}

			// if stopped looping because ", or ' found
			if (buffer[bufferAt] == '=' || (buffer[bufferAt] == '\'')) {
				// write the "
				wordStream.write(buffer[bufferAt]);
				bufferAt++;
			}
		}

		return wordStream.toCharArray();

	}// getURI

	/**
	 * processes the given url depending on the operation given if the op is DEFAULT, then encode the given url if the op is BASE, then set the BASE url as the given url
	 * 
	 * @param in
	 *          - the url to be processed
	 * @param op
	 *          - the operation to be performed
	 **/

	private void processURI(char[] in, int op) {

		try {
			// if url is not an http url then
			if (!isHttpURL(in)) {
				// leave the url as it is
				temp.write('"');
				temp.write(in);
				temp.write('"');
				return;
			}

			switch (op) {
				case DEFAULT:

					if (baseURL == null) {
						temp.write('"');
						temp.write((urlutils.encode(new String(in), sourceURL)).toCharArray());
						temp.write('"');
					} else {
						temp.write('"');
						String resolved1 = urlutils.encode(baseURL, sourceURL);
						temp.write((urlutils.encode(new String(in), resolved1)).toCharArray());
						temp.write('"');
					}
					break;
				case BASE:
					// set BASE
					temp.write('"');
					temp.write(in);
					temp.write('"');
					baseURL = (urlutils.encode(new String(in), sourceURL));
					break;
				default:
					throw new IllegalStateException("invalid op - " + op);
			}// switch
		} catch (java.io.IOException ignored) {
		}
	}// processURI()

	/**
	 * resolves the code uri relative to the codebase uri if one exists
	 */

	private void processAppletTag() {
		try {
			int bufferLen = buffer.length;
			// keywords - code, codebase

			char[] codeURL = null;
			char[] codebaseURL = null;
			String fullCodebase = null;

			// while haven't reached the '>'
			while (bufferAt < bufferLen - 1) {

				skipWhitespace();

				char[] word = getNextWord();

				int wordIndex = isKeyword(word, new char[][] { { 'c', 'o', 'd', 'e' }, { 'c', 'o', 'd', 'e', 'b', 'a', 's', 'e' } });

				// if code
				if (wordIndex == 0) {
					skipWhitespace();
					bufferAt++; // skip the '='
					skipWhitespace();
					codeURL = getURI();

					// if codebase
				} else if (wordIndex == 1) {
					skipWhitespace();
					bufferAt++; // skip the '='
					skipWhitespace();
					codebaseURL = getURI();
					fullCodebase = urlutils.encode(sourceURL, new String(codebaseURL));
					temp.write(word);
					temp.write('=');
					temp.write('"');
					temp.write(fullCodebase.toCharArray());
					temp.write('"');

				} else {
					temp.write(word);
				}

			}// while

			if (codeURL != null) { // unlikely that it does equal null
				temp.write(new char[] { 'C', 'O', 'D', 'E', '=', '"' });

				if (fullCodebase != null) {
					temp.write((urlutils.encode(new String(codeURL), fullCodebase)));

				} else {
					temp.write((urlutils.encode(new String(codeURL), sourceURL)));
				}

				temp.write('"');
			}

		} catch (java.io.IOException ignored) {
		}

	}// processAppletTag()

	/**
	 * resolves the data uri relative to the codebase uri if one exists
	 */

	private void processObjectTag() {
		try {
			// keywords - data, classid, usemap, codebase
			int bufferLen = buffer.length;

			char[] dataURL = null;
			char[] codebaseURL = null;
			String fullCodebase = null;

			// while haven't reached the '>'
			while (bufferAt < bufferLen - 1) {

				skipWhitespace();

				char[] word = getNextWord();

				int wordIndex = isKeyword(word, new char[][] { { 'd', 'a', 't', 'a' }, { 'c', 'o', 'd', 'e', 'b', 'a', 's', 'e' }, { 'u', 's', 'e', 'm', 'a', 'p' }, { 'c', 'l', 'a', 's', 's', 'i', 'd' } });

				// if code
				if (wordIndex == 0) {
					skipWhitespace();
					bufferAt++; // skip the '='
					skipWhitespace();
					dataURL = getURI();

					// if codebase
				} else if (wordIndex == 1) {
					skipWhitespace();
					bufferAt++; // skip the '='
					skipWhitespace();
					codebaseURL = getURI();
					fullCodebase = urlutils.encode(new String(codebaseURL), sourceURL);
					temp.write(word);
					temp.write('=');
					temp.write('"');
					temp.write(fullCodebase);
					temp.write('"');

				} else if (wordIndex == 2 || wordIndex == 3) {
					temp.write(word);
					skipWhitespace();
					bufferAt++; // skip the '='
					temp.write('=');
					skipWhitespace();
					temp.write('"');
					processURI(getURI(), DEFAULT);
					temp.write('"');

				} else {
					temp.write(word);
				}

			}// while

			if (dataURL != null) { // unlikely that it does equal null
				temp.write(new char[] { 'D', 'A', 'T', 'A', '=', '"' });

				if (fullCodebase != null) {
					temp.write((urlutils.encode(new String(dataURL), fullCodebase)).toCharArray());
				} else {
					temp.write((urlutils.encode(new String(dataURL), sourceURL)).toCharArray());
				}

				temp.write('"');
			}

		} catch (java.io.IOException ignored) {
		}
	}// processObjectTag()

	private static boolean isHttpURL(char[] in) {
		// check first if starts with http:
		// if uri put in is long enough to check that it begins with http:
		if (in.length > 5) {
			// check if uri begins with "http:"
			if (in[0] == 'h' && in[1] == 't' && in[2] == 't' && in[3] == 'p' && in[4] == ':') {
				return true;
			}
		}

		// check if this is a relative url i.e. - the uri doesn't specify a protocol
		int index = 0;
		while (index < in.length) {
			// if a colon is found then all chars previous to this make up the
			// protocol and hence this isn't http. Note that this colon cannot
			// be the colon preceding the port number since www.somesite.com:80 is
			// an invalid uri without the http://
			if (in[index] == ':') {
				return false;
			}

			// if the character is not a valid char for a protocol then assume
			// this is a relative http url - so return true
			if (!((in[index] >= 'A' && in[index] <= 'Z') || (in[index] >= 'a' && in[index] <= 'z') || (in[index] >= '0' && in[index] <= '9') || in[index] == '+' || in[index] == '-' || in[index] == '.')) {
				return true;
			}

			index++;

		}// while
			// if reached the end of the uri without finding the end of a protocol
			// (denoted by a ':') then must be a relative http url
		return true;

	}// isHttpURL()

	private static char toSmall(char in) {
		if (in >= 'A' && in <= 'Z')
			return (char) (in + capsToSmallGap);
		else
			return in;
	}

	private boolean compareChars(char[] _c1, char[] _c2) {
		if (_c1.length != _c2.length) {
			return false;
		}

		for (int i = 0; i < _c1.length; i++) {
			if (convertToSmall(_c1[i]) != convertToSmall(_c2[i])) {
				return false;
			}
		}

		return true;
	}

	private static byte capsToSmall = 'A' - 'a';

	private static int convertToSmall(int in) {
		if (in >= 'A' && in <= 'Z') {
			return (in - capsToSmall);
		} else {
			return in;
		}
	}
}