/*
* Copyright 2000-2013 Enonic AS
* http://www.enonic.com/license
*/
package com.enonic.cms.framework.xml;
import java.util.Arrays;
public final class IllegalCharacterCleaner
{
// All ASCII controll characters and some other selected special characters.
private static final String CHARS_TO_REPLACE = "\u0000" +
"\u0001" +
"\u0002" +
"\u0003" +
"\u0004" +
"\u0005" +
"\u0006" +
"\u0007" +
"\u0008" +
"\u000B" +
"\u000C" +
"\u000E" +
"\u000F" +
"\u0010" +
"\u0011" +
"\u0012" +
"\u0013" +
"\u0014" +
"\u0015" +
"\u0016" +
"\u0017" +
"\u0018" +
"\u0019" +
"\u001A" +
"\u001B" +
"\u001C" +
"\u001D" +
"\u001E" +
"\u001F" +
"\u007F" +
"\u0080" +
"\u0081" +
"\u0082" +
"\u0083" +
"\u0084" +
"\u0085" +
"\u0086" +
"\u0087" +
"\u0088" +
"\u0089" +
"\u008A" +
"\u008B" +
"\u008C" +
"\u008D" +
"\u008E" +
"\u008F" +
"\u0090" +
"\u0091" +
"\u0092" +
"\u0093" +
"\u0094" +
"\u0095" +
"\u0096" +
"\u0097" +
"\u0098" +
"\u0099" +
"\u009A" +
"\u009B" +
"\u009C" +
"\u009D" +
"\u009E" +
"\u009F";
private static final String CHARS_TO_REPLACE_WITH = " ";
/**
* Holder of all illegal XML chars. *
*/
private static byte[] ILLEGAL_XML_1_0_CHARS;
static
{
final StringBuffer buff = new StringBuffer();
for ( char i = 0x0000; i < 0x0020; i++ )
{
if ( i != 0x0009 && i != 0x000A && i != 0x000D )
{
buff.append( i );
}
}
ILLEGAL_XML_1_0_CHARS = buff.toString().getBytes();
Arrays.sort( ILLEGAL_XML_1_0_CHARS );
}
public byte[] cleanByteArray( final byte[] bytes, char replacement )
{
for ( int i = 0; i < bytes.length; i++ )
{
byte aByte = bytes[i];
if ( Arrays.binarySearch( ILLEGAL_XML_1_0_CHARS, aByte ) >= 0 )
{
bytes[i] = (byte) replacement;
}
}
return bytes;
}
public String cleanXml( String xml )
{
if ( xml != null )
{
xml = xml.replaceAll( "[" + CHARS_TO_REPLACE + "]", CHARS_TO_REPLACE_WITH );
xml = xml.trim();
return xml;
}
return null;
}
}