/*
* Copyright 2000-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jetspeed.util.rewriter;
// javax.swing.text
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.HTMLEditorKit;
// java.io
import java.io.*;
// java.util
import java.util.*;
// java.net
import java.net.*;
import org.apache.turbine.util.Log;//AAAtogli!
/*
* HTML Parser Adaptor for the Swing 'HotJava' parser.
*
* @author <a href="mailto:taylor@apache.org">David Sean Taylor</a>
* @version $Id: SwingParserAdaptor.java,v 1.6 2004/02/23 03:18:59 jford Exp $
*/
public class SwingParserAdaptor implements HTMLParserAdaptor
{
private SwingParserAdaptor.Callback cb = new SwingParserAdaptor.Callback();
private String lineSeparator;
private boolean skippingImplied = false;
private Rewriter rewriter;
/*
* Construct a swing (hot java) parser adaptor
* Receives a Rewriter parameter, which is used as a callback when rewriting URLs.
* The rewriter object executes the implementation specific URL rewriting.
*
* @param rewriter The rewriter object that is called back during URL rewriting
*/
public SwingParserAdaptor(Rewriter rewriter)
{
this.rewriter = rewriter;
lineSeparator = System.getProperty("line.separator", "\r\n");
}
/*
* Parses and an HTML document, rewriting all URLs as determined by the Rewriter callback
*
*
* @param reader The input stream reader
*
* @throws MalformedURLException
*
* @return An HTML-String with rewritten URLs.
*/
public String run(Reader reader)
throws MalformedURLException
{
HTMLEditorKit.Parser parser = new SwingParserAdaptor.ParserGetter().getParser();
String res ="";
try
{
parser.parse(reader, cb, true);
res = cb.getResult();
} catch (Exception e)
{
e.printStackTrace();
//Log.info("Exception occurred:" + e.toString());AAAtogli!!!
//Log.info("Exception occurred:" + e.printStackTrace());
throw new MalformedURLException(e.toString());
}
return res;
}
/*
* This Class is needed, because getParser is protected and therefore
* only accessibly by a subclass
*/
class ParserGetter extends HTMLEditorKit
{
public HTMLEditorKit.Parser getParser(){
return super.getParser();
}
}
/*
* Swing Parser Callback from the HTMLEditorKit.
* This class handles all SAX-like events during parsing.
*
*/
class Callback extends HTMLEditorKit.ParserCallback
{
// either handling of <FORM> is buggy, or I made some weird mistake ...
// ... JDK 1.3 sends double "</form>"-tags on closing <form>
private boolean inForm = false;
private boolean inScript = false;
private boolean emit = true;
private boolean simpleTag = false;
private StringWriter result = new StringWriter();
private Callback ()
{
}
//
// -------------- Hot Java event callbacks... --------------------
//
/*
* Hot Java event callback for text (all data in between tags)
*
* @param values The array of characters containing the text.
*/
public void handleText(char[] values,int param)
{
if (false == emit)
return;
if (values[0] == '>')
return;
if (false == rewriter.enterText(values, param))
return;
addToResult(values);
}
/*
* Hot Java event callback for handling a simple tag (without begin/end)
*
* @param tag The HTML tag being handled.
* @param attrs The mutable HTML attribute set for the current HTML element.
* @param position the position of the tag.
*
*/
public void handleSimpleTag(HTML.Tag tag,MutableAttributeSet attrs,int param)
{
simpleTag = true;
if (false == rewriter.enterSimpleTagEvent(tag, attrs))
return;
if (false == isValidFragmentTag(tag))
return;
appendTagToResult(tag,attrs);
if (tag.toString().equalsIgnoreCase("param") ||
tag.toString().equalsIgnoreCase("object") ||
tag.toString().equalsIgnoreCase("embed"))
{
result.write(lineSeparator);
}
simpleTag = false;
String appended = rewriter.exitSimpleTagEvent(tag, attrs);
if (null != appended)
result.write(appended);
}
/*
* Hot Java event callback for handling a start tag.
*
* @param tag The HTML tag being handled.
* @param attrs The mutable HTML attribute set for the current HTML element.
* @param position the position of the tag.
*
*/
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attrs, int position)
{
if (false == rewriter.enterStartTagEvent(tag, attrs))
return;
if (tag == HTML.Tag.HEAD)
{
emit = false;
return;
}
if (false == isValidFragmentTag(tag))
return;
appendTagToResult(tag,attrs);
formatLine(tag);
String appended = rewriter.exitStartTagEvent(tag, attrs);
if (null != appended)
result.write(appended);
}
boolean isValidFragmentTag(HTML.Tag tag)
{
/*
if (false == emit)
return false;
if (tag == HTML.Tag.HTML) // always strip out HTML tag for fragments
return false;
if (tag == HTML.Tag.BODY)
return false;
if (tag == HTML.Tag.FRAMESET) // always strip out FRAMESET tag for fragments
return false;
if (tag == HTML.Tag.FRAME)
return false;
if (tag == HTML.Tag.NOFRAMES)
return false;
*/
return true;
}
/*
* Hot Java event callback for handling an end tag.
*
* @param tag The HTML tag being handled.
* @param position the position of the tag.
*
*/
public void handleEndTag(HTML.Tag tag, int position)
{
if (false == rewriter.enterEndTagEvent(tag))
return;
if (tag == HTML.Tag.HEAD)
{
emit = true;
return;
}
if (false == isValidFragmentTag(tag))
return;
addToResult("</").addToResult(tag).addToResult(">");
formatLine(tag);
String appended = rewriter.exitEndTagEvent(tag);
if (null != appended)
result.write(appended);
}
/*
* Hot Java event callback for handling errors.
*
* @param str The error message from Swing.
* @param param A parameter passed to handler.
*
*/
public void handleError(java.lang.String str,int param)
{
// ignored
}
/*
* Hot Java event callback for HTML comments.
*
* @param values The character array of text comments.
* @param param A parameter passed to handler.
*
*/
public void handleComment(char[] values,int param)
{
// STRIP COMMENTS: addToResult(values);
// this is questionable, we may need to turn this on for scripts inside comments
}
/*
* Hot Java event callback for end of line strings.
*
* @param str The end-of-line string.
*
*/
public void handleEndOfLineString(java.lang.String str)
{
addToResult(str);
}
/*
* Prints new lines to make the output a little easier to read when debugging.
*
* @param tag The HTML tag being handled.
*
*/
private void formatLine(HTML.Tag tag)
{
if (tag.isBlock() ||
tag.breaksFlow() ||
tag == HTML.Tag.FRAME ||
tag == HTML.Tag.FRAMESET ||
tag == HTML.Tag.SCRIPT)
{
result.write(lineSeparator);
}
}
/*
* Used to write tag and attribute objects to the output stream.
* Returns a reference to itself so that these calls can be chained.
*
* @param txt Any text to be written out to stream with toString method.
* The object being written should implement its toString method.
* @return A handle to the this, the callback, for chaining results.
*
*/
private Callback addToResult(Object txt)
{
// to allow for implementation using Stringbuffer or StringWriter
// I don't know yet, which one is better in this case
//if (ignoreLevel > 0 ) return this;
try
{
result.write(txt.toString());
} catch (Exception e)
{
System.err.println("Error parsing:" + e);
}
return this;
}
/*
* Used to write all character content to the output stream.
* Returns a reference to itself so that these calls can be chained.
*
* @param txt Any character text to be written out directly to stream.
* @return A handle to the this, the callback, for chaining results.
*
*/
private Callback addToResult(char[] txt)
{
//if (ignoreLevel > 0) return this;
try
{
result.write(txt);
} catch (Exception e)
{ /* ignore */
}
return this;
}
/*
* Accessor to the Callback's content-String
*
* @return Cleaned and rewritten HTML-Content
*/
public String getResult()
{
try
{
result.flush();
} catch (Exception e)
{ /* ignore */
}
// WARNING: doesn't work, if you remove " " + ... but don't know why
String res = " " + result.toString();
return res;
}
/*
* Flushes the output stream. NOT IMPLEMENTED
*
*/
public void flush() throws javax.swing.text.BadLocationException
{
// nothing to do here ...
}
/*
* Writes output to the final stream for all attributes of a given tag.
*
* @param tag The HTML tag being output.
* @param attrs The mutable HTML attribute set for the current HTML tag.
*
*/
private void appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs)
{
convertURLS(tag,attrs);
Enumeration e = attrs.getAttributeNames();
addToResult("<").addToResult(tag);
while (e.hasMoreElements())
{
Object attr = e.nextElement();
String value = attrs.getAttribute(attr).toString();
addToResult(" ").addToResult(attr).addToResult("=\"").
addToResult(value).addToResult("\"");
}
if (simpleTag)
addToResult("/>");
else
addToResult(">");
}
/*
* Determines which HTML Tag/Element is being inspected, and calls the
* appropriate converter for that context. This method contains all the
* logic for determining how tags are rewritten.
*
* TODO: it would be better to drive this logic off a state table that is not
* tied to the Hot Java parser.
*
* @param tag TAG from the Callback-Interface.
* @param attrs The mutable HTML attribute set for the current HTML element.
*/
private void convertURLS( HTML.Tag tag, MutableAttributeSet attrs )
{
rewriter.convertTagEvent(tag, attrs);
if ((tag == HTML.Tag.A) &&
(attrs.getAttribute(HTML.Attribute.HREF) != null))
{
// ---- CHECKING <A HREF
addProxiedConvertedAttribute( tag, HTML.Attribute.HREF, attrs);
}
else if (((tag == HTML.Tag.IMG ||
tag == HTML.Tag.INPUT
) &&
(attrs.getAttribute(HTML.Attribute.SRC) != null)
))
{
// ---- CHECKING <IMG SRC & <INPUT SRC
addConvertedAttribute( tag,
HTML.Attribute.SRC,
attrs,
rewriter.proxyAllTags());
} else if (((tag == HTML.Tag.OPTION) ) &&
(attrs.getAttribute(HTML.Attribute.VALUE) != null))
{
// ---- CHECKING <OPTION
addProxiedConvertedAttribute( tag, HTML.Attribute.VALUE, attrs );
} else if (((tag == HTML.Tag.LINK) ) &&
(attrs.getAttribute(HTML.Attribute.HREF) != null))
{
// ---- CHECKING <LINK
addConvertedAttribute( tag,
HTML.Attribute.HREF,
attrs,
rewriter.proxyAllTags());
} else if ( tag == HTML.Tag.APPLET )
{
// ---- CHECKING <APPLET CODEBASE=
addConvertedAttribute( tag,
HTML.Attribute.CODEBASE,
attrs,
rewriter.proxyAllTags());
} else if ( tag == HTML.Tag.FRAME )
{
// ---- CHECKING <FRAME SRC=
addProxiedConvertedAttribute( tag, HTML.Attribute.SRC, attrs);
} else if ( tag == HTML.Tag.SCRIPT )
{
// ---- CHECKING <SCRIPT SRC=
if (attrs.getAttribute(HTML.Attribute.SRC) != null)
{
// script is external
String s = attrs.getAttribute(HTML.Attribute.SRC).toString();
if (s.indexOf("%3E") == -1)
{
addConvertedAttribute( tag,
HTML.Attribute.SRC,
attrs,
rewriter.proxyAllTags());
}
} else
{
// script is inline
//parserOff = true;
}
} else if (tag == HTML.Tag.FORM)
{
// ---- CHECKING <FORM ACTION=
inForm = true; // buggy <form> handling in jdk 1.3
if (attrs.getAttribute(HTML.Attribute.ACTION) == null)
{
// always post
attrs.addAttribute(HTML.Attribute.METHOD, "POST");
//self referencing <FORM>
// attrs.addAttribute(HTML.Attribute.ACTION,
// baseURL);
} else
{
// always post
attrs.addAttribute(HTML.Attribute.METHOD, "POST");
addProxiedConvertedAttribute( tag, HTML.Attribute.ACTION, attrs);
}
} else if (((tag == HTML.Tag.AREA) ) &&
(attrs.getAttribute(HTML.Attribute.HREF) != null))
{
// ---- CHECKING <AREA
addProxiedConvertedAttribute( tag, HTML.Attribute.HREF,
attrs );
} else if (((tag == HTML.Tag.BODY) ) &&
(attrs.getAttribute(HTML.Attribute.BACKGROUND) != null))
{
// ---- CHECKING <BODY
addConvertedAttribute( tag,
HTML.Attribute.BACKGROUND,
attrs,
rewriter.proxyAllTags());
} else if (tag == HTML.Tag.TD)
{
// ---- CHECKING <TD BACKGROUND=
if (! (attrs.getAttribute(HTML.Attribute.BACKGROUND) == null))
{
addConvertedAttribute( tag,
HTML.Attribute.BACKGROUND,
attrs,
rewriter.proxyAllTags());
}
}
/*
if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
ignoreLevel ++;
*/
}
/*
* Converts the given attribute's URL compatible element to a proxied URL.
* Uses the proxy parameter to determine if the URL should be written back as a
* proxied URL, or as a fullpath to the original host.
*
* @param attr The HTML attribute to be proxied.
* @param attrs The mutable HTML attribute set for the current HTML element.
* @param proxy If set true, the URL is written back as a proxied URL, otherwise
* it is written back as a fullpath back to the original host.
*
*/
private void addConvertedAttribute( HTML.Tag tag,
HTML.Attribute attr,
MutableAttributeSet attrs,
boolean proxy )
{
if (proxy)
{
addProxiedConvertedAttribute(tag, attr,attrs);
} else
{
if ( attrs.getAttribute( attr ) != null )
{
attrs.addAttribute( attr,
generateNewUrl( tag, attrs, attr, false ) );
}
}
}
/**
*
* Converts the given attribute's URL compatible element to a proxied URL.
* This method will always add the proxy host prefix to the rewritten URL.
*
* @param attr The HTML attribute to be proxied.
* @param attrs The mutable HTML attribute set for the current HTML element.
*
*/
private void addProxiedConvertedAttribute( HTML.Tag tag,
HTML.Attribute attr,
MutableAttributeSet attrs ) {
if ( attrs.getAttribute( attr ) != null )
{
String attrSource = attrs.getAttribute( attr ).toString();
// special case: mailto should not be sent to the proxy server
if (attrSource.startsWith("mailto:"))
{
attrs.addAttribute( attr,
generateNewUrl( tag, attrs, attr, true ) );
} else if (attrSource.startsWith("javascript:"))
{
attrs.addAttribute( attr,
attrSource);
} else
{
attrs.addAttribute( attr,
generateNewUrl( tag, attrs, attr, true ) );
}
}
}
/*
* Calls the rewriter's URL generator callback, which will translate the old url
* into a new fullpath URL, either relative to the proxy server, or a fullpath
* to the original web server, depending on the 'proxied' parameter.
*
* @param oldURL The original URL, before it is tranlated.
* @param proxied Boolean indicator denotes if the URL should be written back
* as a proxied URL (true), or as a fully addressable address to the
* original web server.
* @return The translated new URL.
*
*/
private String generateNewUrl(HTML.Tag tag,
MutableAttributeSet attrs,
HTML.Attribute attr,
boolean proxied)
{
String oldURL = attrs.getAttribute( attr ).toString();
// System.out.println("Generating new url: " + oldURL);
return rewriter.generateNewUrl(oldURL, tag, attr);
}
}
}