/* * Copyright (C) 2000 - 2008 TagServlet Ltd * * This file is part of Open BlueDragon (OpenBD) CFML Server Engine. * * OpenBD is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * Free Software Foundation,version 3. * * OpenBD is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenBD. If not, see http://www.gnu.org/licenses/ * * Additional permission under GNU GPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or combining * it with any of the JARS listed in the README.txt (or a modified version of * (that library), containing parts covered by the terms of that JAR, the * licensors of this Program grant you additional permission to convey the * resulting work. * README.txt @ http://www.openbluedragon.org/license/README.txt * * http://www.openbluedragon.org/ */ package com.nary.net.http; /** * tagProcessor * * this class, given a url and port, will resolve urls in tags * passed to it when a tagFilterInputStream instance calls process tag. * Note this convert all urls. Only : * - img src * - a href * - form action * - applet code * - script src * - embed src * - embed pluginspace * - body background * - frame src * - bgsound src * - object data * - object classid * - object codebase * - object usemap * */ import java.io.ByteArrayOutputStream; import com.nary.net.tagFilterInputStream; import com.nary.net.tagListener; import com.nary.util.byteArray; public class tagProcessor implements tagListener{ private ByteArrayOutputStream wordStream; private final static byte DEFAULT=0, BASE=1; private String sourceURL; private String baseURL = null; private final static int capsToSmallGap = (int)'a' - (int)'A'; // for use in reading in and coverting tags private ByteArrayOutputStream temp; private byte [] buffer; private int bufferAt; private UrlLinkResolver urlutils; public tagProcessor( String _url, int _port ){ // sourceURL must be absolute sourceURL = _url; // convert the url so that it contains all /'s as opposed to \'s, // and so that it ends with a '/' sourceURL.replace('\\', '/'); // ensure url ends with a / if (sourceURL.lastIndexOf('/') <= 7){ sourceURL += "/"; } if (!sourceURL.endsWith("/")){ sourceURL = sourceURL.substring(0, sourceURL.lastIndexOf("/") + 1); } int thirdSlashIndex = sourceURL.indexOf('/', 7); // if a port number isn't given in the url, add it in if (sourceURL.indexOf(':', sourceURL.indexOf(':') + 1) == -1){ sourceURL = sourceURL.substring(0, thirdSlashIndex) + ":" + _port + sourceURL.substring(thirdSlashIndex); } temp = new ByteArrayOutputStream(); wordStream = new ByteArrayOutputStream(); urlutils = new UrlLinkResolver(); }//tagProcessor() public byte[] processTag( byte [] _tag ) { buffer = _tag; bufferAt = 0; // reset stream used for copying new tag into temp.reset(); // notes: first byte should be a '<', last byte should be a '>' (might not be LATER) bufferAt = 0; // get the '<' temp.write(buffer[bufferAt]); bufferAt++; skipWhitespace(); byte [] firstWord = getNextWord(); try{ temp.write(firstWord); }catch(java.io.IOException ignored){} // if A if (byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'A'})){ // get href processRestOfTag(new byte[][]{{(byte)'h',(byte)'r',(byte)'e',(byte)'f'}}, DEFAULT); } // if form else if (byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'F',(byte)'O',(byte)'R',(byte)'M'})){ // get action processRestOfTag(new byte[][]{{(byte)'a',(byte)'c',(byte)'t',(byte)'i',(byte)'o',(byte)'n'}}, DEFAULT); } // if embed else if (byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'E',(byte)'M',(byte)'B',(byte)'E',(byte)'D'})){ // get pluginspace, src processRestOfTag(new byte[][]{{(byte)'p',(byte)'l',(byte)'u',(byte)'g',(byte)'i',(byte)'n',(byte)'s',(byte)'p',(byte)'a',(byte)'c',(byte)'e'},{(byte)'s',(byte)'r',(byte)'c'}}, DEFAULT); } // if frame, bgsound, img, script, base else if (byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'F',(byte)'R',(byte)'A',(byte)'M',(byte)'E'}) || byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'B',(byte)'G',(byte)'S',(byte)'O',(byte)'U',(byte)'N',(byte)'D'}) || byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'S',(byte)'C',(byte)'R',(byte)'I',(byte)'P',(byte)'T'}) || byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'I',(byte)'M',(byte)'G'})){ // get src processRestOfTag(new byte[][]{{(byte)'s',(byte)'r',(byte)'c'}}, DEFAULT); } // if base else if (byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'B',(byte)'A',(byte)'S',(byte)'E'})){ // get src processRestOfTag(new byte[][]{{(byte)'s',(byte)'r',(byte)'c'}}, BASE); } // if body else if (byteArray.equalsIgnoreCase(firstWord, new byte[]{(byte)'B',(byte)'O',(byte)'D',(byte)'Y'})){ // get background processRestOfTag(new byte[][]{{(byte)'b',(byte)'a',(byte)'c',(byte)'g',(byte)'r',(byte)'o',(byte)'u',(byte)'n',(byte)'d'}}, DEFAULT); } // NOTE: Object and Applet tags are special cases that may involve a codebase // if object else if (byteArray.equalsIgnoreCase(firstWord, new byte[] {(byte)'O',(byte)'B',(byte)'J',(byte)'E',(byte)'C',(byte)'T'})){ processObjectTag(); } // if applet else if (byteArray.equalsIgnoreCase(firstWord, new byte[] {(byte)'A',(byte)'P',(byte)'P',(byte)'L',(byte)'E',(byte)'T'})){ processAppletTag(); } else{ // not a tag that has any urls that require resolving, so just // return the untouched buffer return buffer; } // get the '>' temp.write(buffer[bufferAt]); bufferAt++; return temp.toByteArray(); }// checkTag() private void processRestOfTag(byte [][] _keywords, byte _tagType) { try{ int bufferLen = buffer.length; // while haven't reached the '>' while (bufferAt < bufferLen-1){ skipWhitespace(); byte [] word = getNextWord(); temp.write(word); // if it's a tag then get the value if (isKeyword(word, _keywords) != -1){ skipWhitespace(); if (buffer[bufferAt] == '='){ temp.write(buffer[bufferAt]); bufferAt++; skipWhitespace(); processURI(getURI(), _tagType); } } }// while }catch (java.io.IOException ignored){} }//processRestOfTag() private void skipWhitespace(){ // skip LWS while ((bufferAt < buffer.length) && buffer[bufferAt] == ' ' || buffer[bufferAt] == '\r' || buffer[bufferAt] == '\n' || buffer[bufferAt] == '\t'){ temp.write(buffer[bufferAt]); bufferAt++; } }// skipWhitespace /** * returns the next word in the buffer (not the stream) * [used to parse the buffer] */ private byte[] getNextWord(){ wordStream.reset(); // while haven't reached the end of the tag & current character is ok while((bufferAt < buffer.length-1) && (isChar(buffer[bufferAt]))){ wordStream.write(buffer[bufferAt]); bufferAt++; } return wordStream.toByteArray(); }// getNextWord // return if character is a legal character other than '=' // except in case where '=' is treated as a word itself private boolean isChar(byte ch){ return ((ch < 0) || (ch > 32 && ch < 256 && ch != 61) || (ch == 61 && wordStream.size() == 0));//((ch >= 65 && ch <=90) || (ch >= 97 && ch <= 122)); }// isChar() /** * returns true if the given word is a tag keyword from the tag list 'tags' */ private int isKeyword(byte[] word, byte[][] _keywords){ int keywordIndex = 0; int wordIndex = 0; // check for each known tag for (int keywordNum = 0; keywordNum < _keywords.length; keywordNum++){ keywordIndex = 0; int wordLen = _keywords[keywordNum].length; // no point comparing this tag if word lengths don't match if (word.length != wordLen) continue; // while the char in the word matches the char in the tag // AND the end of the tag hasn't been reached while (keywordIndex < wordLen && (toSmall(word[wordIndex]) == _keywords[keywordNum][keywordIndex])){ wordIndex++; keywordIndex++; } if (keywordIndex == wordLen){ return keywordNum; } } // no tags match return -1; }// isKeyword() /** * gets the next uri from the byte stream returning it as a byte[] */ private byte[] getURI() { wordStream.reset(); // if next char is " then get next chars up til the next " if (buffer[bufferAt] == '"' || buffer[bufferAt] == '\''){ // don't write the " bufferAt++; this.skipWhitespace(); //if the uri given is just " " if (buffer[bufferAt] == '"' || buffer[bufferAt] == '\''){ return new byte[0]; } // while haven't reached the end '>' or the " for while ((bufferAt < buffer.length-1) && (buffer[bufferAt] != '"') && (buffer[bufferAt] != '\'')){ wordStream.write(buffer[bufferAt]); bufferAt++; } // if stopped looping because " found if (bufferAt != buffer.length-1){ // don't write the " bufferAt++; } } // else get the next chars up til the next white space or carriage return else{ // fix this line to make it more efficient while ((bufferAt < buffer.length-1) && (buffer[bufferAt] != '"') && (buffer[bufferAt] != '\'') && buffer[bufferAt] != '\n' && buffer[bufferAt] != ' '){ wordStream.write(buffer[bufferAt]); bufferAt++; } // if stopped looping because ", or ' found if (buffer[bufferAt] == '=' || (buffer[bufferAt] == '\'')){ // write the " wordStream.write(buffer[bufferAt]); bufferAt++; } } return wordStream.toByteArray(); }// getURI /** * processes the given url depending on the operation given * if the op is DEFAULT, then encode the given url * if the op is BASE, then set the BASE url as the given url * @param in - the url to be processed * @param op - the operation to be performed **/ private void processURI(byte[] in, int op) { try{ // if url is not an http url then if (!isHttpURL(in)){ //leave the url as it is temp.write('"'); temp.write(in); temp.write('"'); return; } switch (op){ case DEFAULT: if (baseURL == null){ temp.write('"'); temp.write((urlutils.encode(new String(in), sourceURL)).getBytes()); temp.write('"'); }else{ temp.write('"'); String resolved1 = urlutils.encode(baseURL, sourceURL); temp.write((urlutils.encode(new String(in), resolved1)).getBytes()); temp.write('"'); } break; case BASE: // set BASE temp.write('"'); temp.write(in); temp.write('"'); baseURL = (urlutils.encode(new String(in), sourceURL)); break; default: throw new IllegalStateException("invalid op - " + op); }//switch }catch (java.io.IOException ignored){} }// processURI() /** * resolves the code uri relative to the codebase uri if one exists */ private void processAppletTag() { try{ int bufferLen = buffer.length; // keywords - code, codebase byte [] codeURL = null; byte [] codebaseURL = null; String fullCodebase = null; // while haven't reached the '>' while (bufferAt < bufferLen-1){ skipWhitespace(); byte [] word = getNextWord(); int wordIndex = isKeyword(word, new byte[][]{{(byte)'c',(byte)'o',(byte)'d',(byte)'e'},{(byte)'c',(byte)'o',(byte)'d',(byte)'e',(byte)'b',(byte)'a',(byte)'s',(byte)'e'}}); // if code if (wordIndex == 0){ skipWhitespace(); bufferAt++; // skip the '=' skipWhitespace(); codeURL = getURI(); // if codebase }else if (wordIndex == 1){ skipWhitespace(); bufferAt++; // skip the '=' skipWhitespace(); codebaseURL = getURI(); fullCodebase = urlutils.encode(sourceURL, new String(codebaseURL)); temp.write(word); temp.write('='); temp.write('"'); temp.write(fullCodebase.getBytes()); temp.write('"'); }else{ temp.write(word); } }// while if (codeURL != null){ // unlikely that it does equal null temp.write(new byte[]{(byte)'C',(byte)'O',(byte)'D',(byte)'E',(byte)'=',(byte)'"'}); if (fullCodebase != null){ temp.write((urlutils.encode(new String(codeURL), fullCodebase)).getBytes()); }else{ temp.write((urlutils.encode(new String(codeURL), sourceURL)).getBytes()); } temp.write('"'); } }catch (java.io.IOException ignored){} }// processAppletTag() /** * resolves the data uri relative to the codebase uri if one exists */ private void processObjectTag() { try{ // keywords - data, classid, usemap, codebase int bufferLen = buffer.length; byte [] dataURL = null; byte [] codebaseURL = null; String fullCodebase = null; // while haven't reached the '>' while (bufferAt < bufferLen-1){ skipWhitespace(); byte [] word = getNextWord(); int wordIndex = isKeyword(word, new byte[][]{{(byte)'d',(byte)'a',(byte)'t',(byte)'a'},{(byte)'c',(byte)'o',(byte)'d',(byte)'e',(byte)'b',(byte)'a',(byte)'s',(byte)'e'}, {(byte)'u',(byte)'s',(byte)'e',(byte)'m',(byte)'a',(byte)'p'}, {(byte)'c',(byte)'l',(byte)'a',(byte)'s',(byte)'s',(byte)'i',(byte)'d'}}); // if code if (wordIndex == 0){ skipWhitespace(); bufferAt++; // skip the '=' skipWhitespace(); dataURL = getURI(); // if codebase }else if (wordIndex == 1){ skipWhitespace(); bufferAt++; // skip the '=' skipWhitespace(); codebaseURL = getURI(); fullCodebase = urlutils.encode(new String(codebaseURL), sourceURL); temp.write(word); temp.write('='); temp.write('"'); temp.write(fullCodebase.getBytes()); temp.write('"'); }else if (wordIndex == 2 || wordIndex == 3){ temp.write(word); skipWhitespace(); bufferAt++; // skip the '=' temp.write('='); skipWhitespace(); temp.write('"'); processURI(getURI(), DEFAULT); temp.write('"'); }else{ temp.write(word); } }// while if (dataURL != null){ // unlikely that it does equal null temp.write(new byte[]{(byte)'D',(byte)'A',(byte)'T',(byte)'A',(byte)'=',(byte)'"'}); if (fullCodebase != null){ temp.write((urlutils.encode(new String(dataURL), fullCodebase)).getBytes()); }else{ temp.write((urlutils.encode(new String(dataURL), sourceURL)).getBytes()); } temp.write('"'); } }catch (java.io.IOException ignored){} }// processObjectTag() private static boolean isHttpURL(byte [] in){ // check first if starts with http: // if uri put in is long enough to check that it begins with http: if (in.length > 5){ // check if uri begins with "http:" if (in[0] == 'h' && in[1] == 't' && in[2] == 't' && in[3] == 'p' && in[4] == ':'){ return true; } } // check if this is a relative url i.e. - the uri doesn't specify a protocol int index = 0; while (index < in.length){ // if a colon is found then all chars previous to this make up the // protocol and hence this isn't http. Note that this colon cannot // be the colon preceding the port number since www.somesite.com:80 is // an invalid uri without the http:// if (in[index] == ':'){ return false; } // if the character is not a valid char for a protocol then assume // this is a relative http url - so return true if (!((in[index] >= 'A' && in[index] <= 'Z') || (in[index] >= 'a' && in[index] <= 'z') || (in[index] >= '0' && in[index] <= '9') || in[index] == '+' || in[index] == '-' || in[index] == '.')){ return true; } index++; }// while // if reached the end of the uri without finding the end of a protocol // (denoted by a ':') then must be a relative http url return true; }// isHttpURL() private static byte toSmall(byte in){ if (in >= 'A' && in <= 'Z') return (byte)(in + capsToSmallGap); else return in; } public static void main( String [] args ){ tagProcessor tp = new tagProcessor( "http://www.n-ary.com", 80 ); tagFilterInputStream is = new tagFilterInputStream( new java.io.ByteArrayInputStream( ("<IMG SRC=\"/image/pic.gif\"><IMG SRC=\"/image/pic.gif\">").getBytes() ) ); is.registerTagListener( tp ); try{ int ch = is.readChar(); while( ch != -1 ){ System.out.print( (char) ch ); ch = is.readChar(); } }catch( java.io.IOException ioe ){ ioe.printStackTrace(); } } }// tagProcessor