/* * Copyright (C) 2011 Laurent Caillette * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.novelang.rendering; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.util.Map; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import org.apache.commons.lang.CharUtils; import org.novelang.logger.Logger; import org.novelang.logger.LoggerFactory; import org.novelang.parser.GeneratedLexemes; import org.novelang.parser.SourceUnescape; import org.novelang.parser.shared.Lexeme; /** * Escapes characters for rendering. Based on {@link GeneratedLexemes}. * <p> * Novelang grammar <em>must</em> define at least HTML named entities for those characters * (otherwise HTML rendering will break): * <pre> < > & * </pre> * * @author Laurent Caillette */ public class RenderingEscape { private static final Logger LOGGER = LoggerFactory.getLogger( RenderingEscape.class ) ; // =============== // Tables creation // =============== private static final Map< Character, String > UNICODE_NAME_ESCAPES ; private static final Map< Character, String > PREFERRED_ESCAPES ; private static final Map< Character, String > HTML_MANDATORY_ESCAPES ; private RenderingEscape() { } static { final Map< Character, String > unicodeNameEscapes = Maps.newHashMap() ; final Map< Character, String > preferredEscapes = Maps.newHashMap() ; final Map< Character, String > htmlMandatoryEscapes = Maps.newHashMap() ; for( final Lexeme lexeme : GeneratedLexemes.getLexemes().values() ) { final Character character = lexeme.getCharacter(); final String unicodeName = lexeme.getUnicodeName() ; put( character, SourceUnescape.unicodeUpperNameToEscapeName( unicodeName ), unicodeNameEscapes ) ; final String htmlEntityName = lexeme.getHtmlEntityName(); if( htmlEntityName != null ) { put( character, htmlEntityName, preferredEscapes ) ; } } put( '<', "lt", preferredEscapes, htmlMandatoryEscapes ) ; put( '>', "gt", preferredEscapes, htmlMandatoryEscapes ) ; put( '&', "amp", preferredEscapes, htmlMandatoryEscapes ) ; put( Spaces.NO_BREAK_SPACE, "nbsp", preferredEscapes, htmlMandatoryEscapes ) ; UNICODE_NAME_ESCAPES = ImmutableMap.copyOf( unicodeNameEscapes ) ; LOGGER.debug( "Created Unicode name escape table with ", UNICODE_NAME_ESCAPES.size(), " entries." ) ; PREFERRED_ESCAPES = ImmutableMap.copyOf( preferredEscapes ) ; LOGGER.debug( "Created preferred escape table with ", PREFERRED_ESCAPES.size(), " entries." ) ; HTML_MANDATORY_ESCAPES = ImmutableMap.copyOf( htmlMandatoryEscapes ) ; LOGGER.debug( "Created HTML mandatory escape table with ", HTML_MANDATORY_ESCAPES.size(), " entries." ) ; } private static void put( final Character character, final String string, final Map< Character, String >... maps ) { for( final Map< Character, String > map : maps ) { map.put( character, string ) ; } } // =========== // HTML escape // =========== private static String escapeHtmlIfNeeded( final char unescaped, final CharsetEncodingCapability capability ) { final String mandatoryEscape = HTML_MANDATORY_ESCAPES.get( unescaped ) ; if( null == mandatoryEscape ) { if( capability.canEncode( unescaped ) ) { return "" + unescaped ; } else { final String convenienceEscape = PREFERRED_ESCAPES.get( unescaped ) ; if( null == convenienceEscape ) { return wrapWithHtmlEntityDelimiters( CharUtils.unicodeEscaped( unescaped ) ) ; } else { return wrapWithHtmlEntityDelimiters( convenienceEscape ) ; } } } else { return wrapWithHtmlEntityDelimiters( mandatoryEscape ) ; } } private static String wrapWithHtmlEntityDelimiters( final String string ) { return "&" + string + ";" ; } /** * For each character, replaces a given character with HTML named entity if not a part * of given charset. * * @param text a non-null object. * @param capability non-null object. * @return a non-null, non-empty String. */ public static String escapeToHtmlText( final String text, final CharsetEncodingCapability capability ) { final StringBuffer buffer = new StringBuffer() ; for( final char c : text.toCharArray() ) { final String escaped = escapeHtmlIfNeeded( c, capability ) ; buffer.append( escaped ) ; } return buffer.toString() ; } // ============= // Source escape // ============= private static String escapeToSourceIfNeeded( final char unescaped, final CharsetEncodingCapability capability ) { if( capability.canEncode( unescaped ) ) { return "" + unescaped ; } else { return unconditionalEscapeToSource( unescaped ); } } public static String unconditionalEscapeToSource( final char unescaped ) { final String preferredEscape = PREFERRED_ESCAPES.get( unescaped ) ; if( null == preferredEscape ) { final String unicodeEscape = UNICODE_NAME_ESCAPES.get( unescaped ); if( null == unicodeEscape ) { throw new IllegalArgumentException( "No Unicode name for: " + CharUtils.unicodeEscaped( unescaped ) ) ; } else { return wrapWithSourceEscapeDelimiters( unicodeEscape ) ; } } else { return wrapWithSourceEscapeDelimiters( preferredEscape ) ; } } private static String wrapWithSourceEscapeDelimiters( final String string ) { return SourceUnescape.ESCAPE_START + string + SourceUnescape.ESCAPE_END ; } /** * For each character, replaces with source-friendly escape if not a part of given charset. * Escape is based on HTML entity name if available, or Unicode name otherwise. * * @param text a non-null object. * @param capability non-null object. * @return a non-null, non-empty String. */ public static String escapeToSourceText( final String text, final CharsetEncodingCapability capability ) { final StringBuffer buffer = new StringBuffer() ; for( final char c : text.toCharArray() ) { final String escaped = escapeToSourceIfNeeded( c, capability ) ; buffer.append( escaped ) ; } return buffer.toString() ; } // =================== // Encoding capability // =================== /** * Avoids to expose a whole {@link CharsetEncoder} while we just want to know * if it can encode a character. */ public static CharsetEncodingCapability createCapability( final Charset charset ) { final CharsetEncoder encoder = charset.newEncoder() ; return new CharsetEncodingCapability() { @Override public boolean canEncode( final char c ) { return encoder.canEncode( c ) ; } } ; } public interface CharsetEncodingCapability { boolean canEncode( char c ) ; } }