/* * Copyright (C) 2000 - 2011 TagServlet Ltd * * This file is part of Open BlueDragon (OpenBD) CFML Server Engine. * * OpenBD is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * Free Software Foundation,version 3. * * OpenBD is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenBD. If not, see http://www.gnu.org/licenses/ * * Additional permission under GNU GPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or combining * it with any of the JARS listed in the README.txt (or a modified version of * (that library), containing parts covered by the terms of that JAR, the * licensors of this Program grant you additional permission to convey the * resulting work. * README.txt @ http://www.openbluedragon.org/license/README.txt * * http://openbd.org/ * $Id: $ */ package com.nary.net.http; import java.io.CharArrayWriter; import com.nary.net.tagFilter; /** * this class, given a url and port, will resolve urls in tags passed to it when a tagFilterInputStream instance calls process tag. Note this convert all urls. Only : - img src - a href - form action - applet code - script src - embed src - embed pluginspace - body background - frame src - bgsound src - object data - object classid - object codebase - object usemap */ public class urlResolver implements tagFilter { private CharArrayWriter wordStream; private final static byte DEFAULT = 0, BASE = 1; private String sourceURL; private String baseURL = null; private final static int capsToSmallGap = (int) 'a' - (int) 'A'; // for use in reading in and coverting tags private CharArrayWriter temp; private char[] buffer; private int bufferAt; private UrlLinkResolver urlutils; public urlResolver(String _url, int _port) { // sourceURL must be absolute sourceURL = _url; // convert the url so that it contains all /'s as opposed to \'s, // and so that it ends with a '/' sourceURL.replace('\\', '/'); // ensure url ends with a / if (sourceURL.lastIndexOf('/') <= 7) { sourceURL += "/"; } if (!sourceURL.endsWith("/")) { sourceURL = sourceURL.substring(0, sourceURL.lastIndexOf("/") + 1); } int thirdSlashIndex = sourceURL.indexOf('/', 7); // if a port number isn't given in the url, add it in if (_port != -1 && sourceURL.indexOf(':', sourceURL.indexOf(':') + 1) == -1) { sourceURL = sourceURL.substring(0, thirdSlashIndex) + ":" + _port + sourceURL.substring(thirdSlashIndex); } temp = new CharArrayWriter(); wordStream = new CharArrayWriter(); urlutils = new UrlLinkResolver(); } public char[] processTag(char[] _tag) { buffer = _tag; bufferAt = 0; // reset stream used for copying new tag into temp.reset(); // notes: first byte should be a '<', last byte should be a '>' (might not be LATER) bufferAt = 0; // get the '<' temp.write(buffer[bufferAt]); bufferAt++; skipWhitespace(); char[] firstWord = getNextWord(); try { temp.write(firstWord); } catch (java.io.IOException ignored) { } // if A if (compareChars(firstWord, new char[] { 'A' }) || compareChars(firstWord, new char[] { 'L', 'I', 'N', 'K' })) { // get href processRestOfTag(new char[][] { { 'h', 'r', 'e', 'f' } }, DEFAULT); } // if form else if (compareChars(firstWord, new char[] { 'F', 'O', 'R', 'M' })) { // get action processRestOfTag(new char[][] { { 'a', 'c', 't', 'i', 'o', 'n' } }, DEFAULT); } // if embed else if (compareChars(firstWord, new char[] { 'E', 'M', 'B', 'E', 'D' })) { // get pluginspace, src processRestOfTag(new char[][] { { 'p', 'l', 'u', 'g', 'i', 'n', 's', 'p', 'a', 'c', 'e' }, { 's', 'r', 'c' } }, DEFAULT); } // if frame, bgsound, img, script, base else if (compareChars(firstWord, new char[] { 'F', 'R', 'A', 'M', 'E' }) || compareChars(firstWord, new char[] { 'B', 'G', 'S', 'O', 'U', 'N', 'D' }) || compareChars(firstWord, new char[] { 'S', 'C', 'R', 'I', 'P', 'T' }) || compareChars(firstWord, new char[] { 'I', 'M', 'G' })) { // get src processRestOfTag(new char[][] { { 's', 'r', 'c' } }, DEFAULT); } // if base else if (compareChars(firstWord, new char[] { 'B', 'A', 'S', 'E' })) { // get src processRestOfTag(new char[][] { { 's', 'r', 'c' } }, BASE); } // if body else if (compareChars(firstWord, new char[] { 'B', 'O', 'D', 'Y' }) || compareChars(firstWord, new char[] { 'T', 'D' })) { // get background processRestOfTag(new char[][] { { 'b', 'a', 'c', 'k', 'g', 'r', 'o', 'u', 'n', 'd' } }, DEFAULT); } // NOTE: Object and Applet tags are special cases that may involve a codebase // if object else if (compareChars(firstWord, new char[] { 'O', 'B', 'J', 'E', 'C', 'T' })) { processObjectTag(); } // if applet else if (compareChars(firstWord, new char[] { 'A', 'P', 'P', 'L', 'E', 'T' })) { processAppletTag(); } else { // not a tag that has any urls that require resolving, so just // return the untouched buffer return buffer; } // get the '>' temp.write(buffer[bufferAt]); bufferAt++; return temp.toCharArray(); }// checkTag() private void processRestOfTag(char[][] _keywords, byte _tagType) { try { int bufferLen = buffer.length; // while haven't reached the '>' while (bufferAt < bufferLen - 1) { skipWhitespace(); char[] word = getNextWord(); temp.write(word); // if it's a tag then get the value if (isKeyword(word, _keywords) != -1) { skipWhitespace(); if (buffer[bufferAt] == '=') { temp.write(buffer[bufferAt]); bufferAt++; skipWhitespace(); processURI(getURI(), _tagType); } } }// while } catch (java.io.IOException ignored) { } }// processRestOfTag() private void skipWhitespace() { // skip LWS while ((bufferAt < buffer.length) && buffer[bufferAt] == ' ' || buffer[bufferAt] == '\r' || buffer[bufferAt] == '\n' || buffer[bufferAt] == '\t') { temp.write(buffer[bufferAt]); bufferAt++; } }// skipWhitespace /** * returns the next word in the buffer (not the stream) [used to parse the buffer] */ private char[] getNextWord() { wordStream.reset(); // while haven't reached the end of the tag & current character is ok while ((bufferAt < buffer.length - 1) && (isChar(buffer[bufferAt]))) { wordStream.write(buffer[bufferAt]); bufferAt++; } return wordStream.toCharArray(); }// getNextWord // return if character is a legal character other than '=' // except in case where '=' is treated as a word itself private boolean isChar(char ch) { return ((ch < 0) || (ch > 32 && ch != 61) || (ch == 61 && wordStream.size() == 0)); }// isChar() /** * returns true if the given word is a tag keyword from the tag list 'tags' */ private int isKeyword(char[] word, char[][] _keywords) { int keywordIndex = 0; int wordIndex = 0; // check for each known tag for (int keywordNum = 0; keywordNum < _keywords.length; keywordNum++) { keywordIndex = 0; int wordLen = _keywords[keywordNum].length; // no point comparing this tag if word lengths don't match if (word.length != wordLen) continue; // while the char in the word matches the char in the tag // AND the end of the tag hasn't been reached while (keywordIndex < wordLen && (toSmall(word[wordIndex]) == _keywords[keywordNum][keywordIndex])) { wordIndex++; keywordIndex++; } if (keywordIndex == wordLen) { return keywordNum; } } // no tags match return -1; }// isKeyword() /** * gets the next uri from the byte stream returning it as a char[] */ private char[] getURI() { wordStream.reset(); // if next char is " then get next chars up til the next " if (buffer[bufferAt] == '"' || buffer[bufferAt] == '\'') { // don't write the " bufferAt++; this.skipWhitespace(); // if the uri given is just " " if (buffer[bufferAt] == '"' || buffer[bufferAt] == '\'') { return new char[0]; } // while haven't reached the end '>' or the " for while ((bufferAt < buffer.length - 1) && (buffer[bufferAt] != '"') && (buffer[bufferAt] != '\'')) { wordStream.write(buffer[bufferAt]); bufferAt++; } // if stopped looping because " found if (bufferAt != buffer.length - 1) { // don't write the " bufferAt++; } } // else get the next chars up til the next white space or carriage return else { // fix this line to make it more efficient while ((bufferAt < buffer.length - 1) && (buffer[bufferAt] != '"') && (buffer[bufferAt] != '\'') && buffer[bufferAt] != '\n' && buffer[bufferAt] != ' ') { wordStream.write(buffer[bufferAt]); bufferAt++; } // if stopped looping because ", or ' found if (buffer[bufferAt] == '=' || (buffer[bufferAt] == '\'')) { // write the " wordStream.write(buffer[bufferAt]); bufferAt++; } } return wordStream.toCharArray(); }// getURI /** * processes the given url depending on the operation given if the op is DEFAULT, then encode the given url if the op is BASE, then set the BASE url as the given url * * @param in * - the url to be processed * @param op * - the operation to be performed **/ private void processURI(char[] in, int op) { try { // if url is not an http url then if (!isHttpURL(in)) { // leave the url as it is temp.write('"'); temp.write(in); temp.write('"'); return; } switch (op) { case DEFAULT: if (baseURL == null) { temp.write('"'); temp.write((urlutils.encode(new String(in), sourceURL)).toCharArray()); temp.write('"'); } else { temp.write('"'); String resolved1 = urlutils.encode(baseURL, sourceURL); temp.write((urlutils.encode(new String(in), resolved1)).toCharArray()); temp.write('"'); } break; case BASE: // set BASE temp.write('"'); temp.write(in); temp.write('"'); baseURL = (urlutils.encode(new String(in), sourceURL)); break; default: throw new IllegalStateException("invalid op - " + op); }// switch } catch (java.io.IOException ignored) { } }// processURI() /** * resolves the code uri relative to the codebase uri if one exists */ private void processAppletTag() { try { int bufferLen = buffer.length; // keywords - code, codebase char[] codeURL = null; char[] codebaseURL = null; String fullCodebase = null; // while haven't reached the '>' while (bufferAt < bufferLen - 1) { skipWhitespace(); char[] word = getNextWord(); int wordIndex = isKeyword(word, new char[][] { { 'c', 'o', 'd', 'e' }, { 'c', 'o', 'd', 'e', 'b', 'a', 's', 'e' } }); // if code if (wordIndex == 0) { skipWhitespace(); bufferAt++; // skip the '=' skipWhitespace(); codeURL = getURI(); // if codebase } else if (wordIndex == 1) { skipWhitespace(); bufferAt++; // skip the '=' skipWhitespace(); codebaseURL = getURI(); fullCodebase = urlutils.encode(sourceURL, new String(codebaseURL)); temp.write(word); temp.write('='); temp.write('"'); temp.write(fullCodebase.toCharArray()); temp.write('"'); } else { temp.write(word); } }// while if (codeURL != null) { // unlikely that it does equal null temp.write(new char[] { 'C', 'O', 'D', 'E', '=', '"' }); if (fullCodebase != null) { temp.write((urlutils.encode(new String(codeURL), fullCodebase))); } else { temp.write((urlutils.encode(new String(codeURL), sourceURL))); } temp.write('"'); } } catch (java.io.IOException ignored) { } }// processAppletTag() /** * resolves the data uri relative to the codebase uri if one exists */ private void processObjectTag() { try { // keywords - data, classid, usemap, codebase int bufferLen = buffer.length; char[] dataURL = null; char[] codebaseURL = null; String fullCodebase = null; // while haven't reached the '>' while (bufferAt < bufferLen - 1) { skipWhitespace(); char[] word = getNextWord(); int wordIndex = isKeyword(word, new char[][] { { 'd', 'a', 't', 'a' }, { 'c', 'o', 'd', 'e', 'b', 'a', 's', 'e' }, { 'u', 's', 'e', 'm', 'a', 'p' }, { 'c', 'l', 'a', 's', 's', 'i', 'd' } }); // if code if (wordIndex == 0) { skipWhitespace(); bufferAt++; // skip the '=' skipWhitespace(); dataURL = getURI(); // if codebase } else if (wordIndex == 1) { skipWhitespace(); bufferAt++; // skip the '=' skipWhitespace(); codebaseURL = getURI(); fullCodebase = urlutils.encode(new String(codebaseURL), sourceURL); temp.write(word); temp.write('='); temp.write('"'); temp.write(fullCodebase); temp.write('"'); } else if (wordIndex == 2 || wordIndex == 3) { temp.write(word); skipWhitespace(); bufferAt++; // skip the '=' temp.write('='); skipWhitespace(); temp.write('"'); processURI(getURI(), DEFAULT); temp.write('"'); } else { temp.write(word); } }// while if (dataURL != null) { // unlikely that it does equal null temp.write(new char[] { 'D', 'A', 'T', 'A', '=', '"' }); if (fullCodebase != null) { temp.write((urlutils.encode(new String(dataURL), fullCodebase)).toCharArray()); } else { temp.write((urlutils.encode(new String(dataURL), sourceURL)).toCharArray()); } temp.write('"'); } } catch (java.io.IOException ignored) { } }// processObjectTag() private static boolean isHttpURL(char[] in) { // check first if starts with http: // if uri put in is long enough to check that it begins with http: if (in.length > 5) { // check if uri begins with "http:" if (in[0] == 'h' && in[1] == 't' && in[2] == 't' && in[3] == 'p' && in[4] == ':') { return true; } } // check if this is a relative url i.e. - the uri doesn't specify a protocol int index = 0; while (index < in.length) { // if a colon is found then all chars previous to this make up the // protocol and hence this isn't http. Note that this colon cannot // be the colon preceding the port number since www.somesite.com:80 is // an invalid uri without the http:// if (in[index] == ':') { return false; } // if the character is not a valid char for a protocol then assume // this is a relative http url - so return true if (!((in[index] >= 'A' && in[index] <= 'Z') || (in[index] >= 'a' && in[index] <= 'z') || (in[index] >= '0' && in[index] <= '9') || in[index] == '+' || in[index] == '-' || in[index] == '.')) { return true; } index++; }// while // if reached the end of the uri without finding the end of a protocol // (denoted by a ':') then must be a relative http url return true; }// isHttpURL() private static char toSmall(char in) { if (in >= 'A' && in <= 'Z') return (char) (in + capsToSmallGap); else return in; } private boolean compareChars(char[] _c1, char[] _c2) { if (_c1.length != _c2.length) { return false; } for (int i = 0; i < _c1.length; i++) { if (convertToSmall(_c1[i]) != convertToSmall(_c2[i])) { return false; } } return true; } private static byte capsToSmall = 'A' - 'a'; private static int convertToSmall(int in) { if (in >= 'A' && in <= 'Z') { return (in - capsToSmall); } else { return in; } } }