/*
* Created on Jan 7, 2004
*
*/
package com.openedit.modules.html;
/**
* @author Matthew Avery
*
*/
public class Tidy
{
//protected org.w3c.tidy.Tidy fieldJtidy;
protected String escapeSpecialCharacters( String inRawSource )
{
final char NBSP = 160;
// replace invalid characters with their ASCII codes
// (e.g., the character with ASCII code 160 becomes " ")
StringBuffer escapedSource = new StringBuffer();
for ( int n = 0; n < inRawSource.length(); n++ )
{
char c = inRawSource.charAt( n );
if ( c == NBSP )
{
escapedSource.append( " " );
}
else if ( !Character.isISOControl( c ) || Character.isWhitespace( c ) )
{
escapedSource.append( c );
}
else
{
escapedSource.append( "" + Integer.toString( (int)c ) + ";" );
}
}
return escapedSource.toString().trim();
}
public String removeHtml(String inHtml)
{
//getJtidy().parseDOM()
//String val = inHtml.replaceAll("<br>","\n");
//val = val.replaceAll("<br />","\n");
String val = inHtml;
val = val.replaceAll("<a","::link::");
val = val.replaceAll("</a>","::closelink::");
val = val.replaceAll("<[^>]*>","");
val = val.replaceAll("::link::","<a");
val = val.replaceAll("::closelink::","</a>");
val = val.replaceAll(" "," ");
val = val.replaceAll(""","\"");
val = val.replaceAll("™","TM");
return val;
}
/*
public String tidySource( String inRawSource, boolean inPreserveHeader )
{
ByteArrayInputStream inputStream = new ByteArrayInputStream( inRawSource.getBytes() );
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
getJtidy().parse( inputStream, outputStream );
String html = outputStream.toString();;
if ( inPreserveHeader )
{
return html;
}
if ( inRawSource.startsWith("<style"))
{
return inRawSource;
}
if ( inRawSource.startsWith("<script"))
{
return inRawSource;
}
String body = "<body>";
int startIndex = html.indexOf( body );
if ( startIndex < 0 )
{
return inRawSource; //We cannot handle it
}
else
{
startIndex += body.length();
}
String bodyclose = "</body>";
int endIndex = html.indexOf( bodyclose );
if ( endIndex < 0 )
{
endIndex = Math.max( html.length() - 1, startIndex );
}
String substring = html.substring( startIndex, endIndex );
//TODO: we need to get back any Javascript or style stuff that got moved to the head tag
if ( substring == null || substring.length() == 0)
{
return inRawSource; //so it does not delete it all
}
return escapeSpecialCharacters( substring );
}
protected org.w3c.tidy.Tidy getJtidy()
{
if ( fieldJtidy == null )
{
fieldJtidy = new org.w3c.tidy.Tidy();
fieldJtidy.setWraplen(200); //we should not need to wrap stuff
fieldJtidy.setSpaces(4);
//fieldJtidy.setPrintBodyOnly(true);
//fieldJtidy.setMakeClean(false);
fieldJtidy.setXHTML(true);
fieldJtidy.setTabsize(3);
fieldJtidy.setShowWarnings(false);
fieldJtidy.setQuiet( true );
}
return fieldJtidy;
}
*/
}