/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.wicket.response.filter; import org.apache.wicket.page.XmlPartialPageUpdate; import org.apache.wicket.util.string.AppendingStringBuffer; import org.apache.wicket.util.string.Strings; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * An IResponseFilter that removes all invalid XML characters. * By default it is used only for Wicket <em>Ajax</em> responses. * * <p>If the application needs to use it for other use cases then it can either override * {@linkplain #shouldFilter(AppendingStringBuffer)} in the case it is used as IResponseFilter or * {@linkplain #stripNonValidXMLCharacters(AppendingStringBuffer)} can be used directly. * </p> * * <p>Usage: * * MyApplication.java * <code><pre> * public void init() { * super.init(); * * getRequestCycleSettings().addResponseFilter(new XmlCleaningResponseFilter()); * } * </pre></code> * </p> */ public class XmlCleaningResponseFilter implements IResponseFilter { private static final Logger LOG = LoggerFactory.getLogger(XmlCleaningResponseFilter.class); @Override public AppendingStringBuffer filter(AppendingStringBuffer responseBuffer) { AppendingStringBuffer result = responseBuffer; if (shouldFilter(responseBuffer)) { result = stripNonValidXMLCharacters(responseBuffer); } return result; } /** * Decides whether the filter should be applied. * * @param responseBuffer The buffer to filter * @return {@code true} if the buffer brings Ajax response */ protected boolean shouldFilter(AppendingStringBuffer responseBuffer) { // To avoid reading the whole buffer for non-Ajax responses // read just the first N chars. A candidate can start with: // <?xml version="1.0" encoding="UTF-8" standalone="yes"?><ajax-response> int min = Math.min(150, responseBuffer.length()); String firstNChars = responseBuffer.substring(0, min); return firstNChars.contains(XmlPartialPageUpdate.START_ROOT_ELEMENT); } /** * This method ensures that the output String has only * valid XML unicode characters as specified by the * XML 1.0 standard. For reference, please see * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the * standard</a>. This method will return an empty * String if the input is null or empty. * * @param input The StringBuffer whose non-valid characters we want to remove. * @return The in String, stripped of non-valid characters. */ public AppendingStringBuffer stripNonValidXMLCharacters(AppendingStringBuffer input) { if (input == null) { return new AppendingStringBuffer(); } char[] chars = input.getValue(); AppendingStringBuffer out = null; boolean isDebugEnabled = LOG.isDebugEnabled(); int codePoint; int i = 0; while (i < input.length()) { codePoint = Character.codePointAt(chars, i, chars.length); if (!isValidXmlChar(codePoint)) { if (out == null) { out = new AppendingStringBuffer(chars.length); out.append(input.subSequence(0, i)); if (isDebugEnabled) { LOG.debug("An invalid character '{}' found at position '{}' in '{}'", String.format("0x%X", codePoint), i, new String(chars)); } } else if (isDebugEnabled) { LOG.debug(String.format("Dropping character for codePoint '0x%X' at position '%d'", codePoint, i)); } } else if (out != null) { out.append(Character.toChars(codePoint)); } // Increment with the number of code units(java chars) needed to represent a Unicode char. i += Character.charCount(codePoint); } return out != null ? out : input; } /** * Checks whether the character represented by this codePoint is * a valid in XML documents. * * @param codePoint The codePoint for the checked character * @return {@code true} if the character can be used in XML documents */ protected boolean isValidXmlChar(int codePoint) { return (codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF)); } }