/*
* Copyright (C) 2011 Laurent Caillette
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.novelang.outfit.xml;
import java.io.IOException;
import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.xml.sax.InputSource;
import org.novelang.logger.Logger;
import org.novelang.logger.LoggerFactory;
/**
* Tweaks DTD entity on-the-fly for escaping entity that we need to appear as-they-are
* inside rendered HTML.
*
* @author Laurent Caillette
*/
public class DtdTools {
private static final Logger LOGGER = LoggerFactory.getLogger( DtdTools.class );
private static final Pattern PATTERN = Pattern.compile(
"\\<\\!ENTITY\\s+(\\w+)\\s+\\\"((?:\\d|\\w|\\;)+)\\\"\\s*?\\>" ) ;
static {
LOGGER.debug( "Crafted regex ", PATTERN.pattern() ) ;
}
private static final String REPLACEMENT = "<!ENTITY $1 \"&$1;\" > " ;
private DtdTools() { }
public static InputSource escapeEntities( final InputSource unescapedInputSource )
throws IOException
{
final String unescapedDtd ;
if( null == unescapedInputSource.getCharacterStream() ) {
if( null == unescapedInputSource.getByteStream() ) {
throw new IllegalArgumentException( "unescapedInputSource provides no valid stream" ) ;
} else {
unescapedDtd = IOUtils.toString( unescapedInputSource.getByteStream() ) ;
}
} else {
unescapedDtd = IOUtils.toString( unescapedInputSource.getCharacterStream() ) ;
}
final Matcher matcher = PATTERN.matcher( unescapedDtd ) ;
final String escapedDtd = matcher.replaceAll( REPLACEMENT ) ;
final InputSource escapedInputSource = new InputSource( unescapedInputSource.getSystemId() );
escapedInputSource.setEncoding( unescapedInputSource.getEncoding() ) ;
escapedInputSource.setPublicId( unescapedInputSource.getPublicId() ) ;
escapedInputSource.setCharacterStream( new StringReader( escapedDtd ) ) ;
return escapedInputSource;
}
}