package com.rectang.xsm.util; import com.rectang.xsm.site.Site; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.HTML; import javax.swing.text.MutableAttributeSet; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.*; import java.io.StringReader; /** * Html utilities * * @author Andrew Williams * @version $Id: HTMLUtils.java 793 2009-04-20 19:01:21Z andy $ * @since 2.0 */ public class HTMLUtils { /** * Summarise an html string, returning a copy of the beginning of the String * <code>in</code> stopping at the last space before the position * <code>chars</code>. "..." will be appended to strings that are truncated. * All HTML tags remaining open at the end of the summary will be closed. * The <code>char</limit> does <b>not</b> include tags. * * @param in The String input to summarise * @param chars The maximum number of chars to appear in the summary * @return A new String summarising the input */ public static synchronized String summarise( String in, int chars ) { try { SummaryParser parser = new SummaryParser( chars ); // compensate for parser decoding HTML entities parser.getParser().parse( new StringReader( in.replaceAll( "&", "&" ) ), parser.new ParserCallback(), true ); return parser.getSummary(); } catch ( Exception e ) { e.printStackTrace(); /* if we cannot parse just return a trimmed version */ return StringUtils.summarise( in, chars ); } } /** * Convert a page name to a url encoded page url. * * @param in The page name to encode * @return A url encoded version of the page name */ public String urlEncode( String in ) { try { return URLEncoder.encode( in, "UTF-8" ); } catch ( UnsupportedEncodingException e ) { // UTF-8 always exists } return in; } public static String toAbsolute( String link, Site site ) { if ( link.indexOf( "://" ) != -1 ) { return link; } if ( link.charAt( 0 ) == '/' ) { String server = site.getRootUrl().substring( 0, site.getRootUrl().length() - site.getPrefixUrl().length() ); return server + link; } //TODO handle relative links from the page, not the site root return link; } public static synchronized String toAbsoluteLinks( String in, Site site ) { LinkParser parser = new LinkParser( site ); try { // compensate for parser decoding HTML entities parser.getParser().parse( new StringReader( in.replaceAll( "&", "&" ) ), parser.new ParserCallback(), true ); return parser.getAbsoluteHTML(); } catch ( Exception e ) { return in; } } } /** * A simple HTML parser based on hte HTMLEditorKit in javax.swing.text.html. * This parser summarises HTML by stripping all tags and returning the text cut * to the last space before the limit. "..." is appended to truncated strings. * The limit does not include tags. * * @author aje * */ class SummaryParser extends HTMLEditorKit { private StringBuffer summary; private boolean appending; private int chars, count; private Vector open; public SummaryParser( int chars ) { this.summary = new StringBuffer(); this.appending = true; this.chars = chars; this.count = 0; this.open = new Vector(); } public HTMLEditorKit.Parser getParser() { return super.getParser(); } public class ParserCallback extends HTMLEditorKit.ParserCallback { public void handleText( char[] data, int pos ) { if ( count > chars ) { return; } StringTokenizer tokens = new StringTokenizer( new String( data ), " ", true ); while ( tokens.hasMoreElements() && appending ) { String token = (String) tokens.nextElement(); /* weird that the parser should return these brackets... */ if ( token.startsWith( ">" ) ) { if ( token.length() <= 1 ) { continue; } else { token = token.substring( 1 ); } } count += token.length() + 1; if ( count > chars ) { appending = false; break; } summary.append( token ); } } public void handleSimpleTag( HTML.Tag t, MutableAttributeSet a, int pos ) { if ( !appending || a.containsAttribute( IMPLIED, Boolean.TRUE ) ) { return; } summary.append( "<" + t.toString() ); printAttributes( a ); summary.append( "/>" ); } public void handleStartTag( HTML.Tag t, MutableAttributeSet a, int pos ) { if ( !appending || a.containsAttribute( IMPLIED, Boolean.TRUE ) ) { return; } open.add( t ); summary.append( "<" + t.toString() ); printAttributes( a ); summary.append( ">" ); } public void handleEndTag( HTML.Tag t, int pos ) { if ( !appending ) { return; } for ( int i = open.size() - 1; i >= 0; i-- ) { if ( open.get( i ).equals( t ) ) { summary.append( "</" + t.toString() + ">" ); open.remove( i ); break; } } } public void handleEndOfLineString( String eol ) { for ( int i = open.size() - 1; i >= 0; i-- ) { summary.append( "</" + open.get( i ).toString() + ">" ); open.remove( i ); } if ( !appending ) { summary.append( "..." ); } } public void printAttributes( MutableAttributeSet a ) { Enumeration att = a.getAttributeNames(); while ( att.hasMoreElements() ) { Object next = att.nextElement(); summary.append( " " + next.toString() + "=\"" + a.getAttribute( next ) + "\"" ); } } } public String getSummary() { return summary.toString(); } } class LinkParser extends HTMLEditorKit { private StringBuffer absolute; private Site site; public LinkParser( Site site ) { this.absolute = new StringBuffer(); this.site = site; } public String getAbsoluteHTML() { return absolute.toString(); } public HTMLEditorKit.Parser getParser() { return super.getParser(); } public class ParserCallback extends HTMLEditorKit.ParserCallback { private Set implied = new HashSet(); public void handleText( char[] data, int pos ) { if ( data[0] == '>' ) { absolute.append( data, 1, data.length - 1 ); } else { absolute.append( data ); } } public void handleSimpleTag( HTML.Tag t, MutableAttributeSet a, int pos ) { if ( a.containsAttribute( IMPLIED, Boolean.TRUE ) || a.containsAttribute( HTML.Attribute.ENDTAG, "true" ) ) { return; } absolute.append( "<" + t.toString() ); printAttributes( a ); absolute.append( "/>" ); } public void handleStartTag( HTML.Tag t, MutableAttributeSet a, int pos ) { if ( a.containsAttribute( IMPLIED, Boolean.TRUE ) ) { implied.add( t ); return; } absolute.append( "<" + t.toString() ); printAttributes( a ); absolute.append( ">" ); } public void handleEndTag( HTML.Tag t, int pos ) { if ( implied.contains( t ) ) { implied.remove( t ); return; } absolute.append( "</" + t.toString() + ">" ); } public void handleEndOfLineString( String eol ) { absolute.append( eol ); } public void printAttributes( MutableAttributeSet a ) { Enumeration att = a.getAttributeNames(); while ( att.hasMoreElements() ) { Object name = att.nextElement(); Object value = a.getAttribute( name ); if ( name.equals( HTML.Attribute.HREF ) || name.equals( HTML.Attribute.SRC ) ) { value = HTMLUtils.toAbsolute( (String) value, site ); } absolute.append( " " + name.toString() + "=\"" + value + "\"" ); } } } }