/* * Copyright (c) 2014 the original author or authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.werval.modules.sanitize; import java.net.URL; import java.util.Locale; import java.util.MissingResourceException; import java.util.ResourceBundle; import io.werval.api.i18n.Lang; import io.werval.modules.metrics.Metrics; import com.codahale.metrics.Timer; import com.google.json.JsonSanitizer; import org.owasp.encoder.Encode; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.AntiSamy; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; import org.owasp.validator.html.Policy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static io.werval.util.IllegalArguments.ensureNotEmpty; import static io.werval.util.Strings.EMPTY; import static io.werval.util.Strings.isEmpty; /** * Sanitize and encode. */ // TODO Allow JIT to inline NOOP or Metrics accounting public final class Sanitize { private static final Logger LOG = LoggerFactory.getLogger( Sanitize.class ); public static final String ANTISAMY_ANYTHINGGOES = "antisamy-anythinggoes-1.4.4.xml"; public static final String ANTISAMY_SLASHDOT = "antisamy-slashdot-1.4.4.xml"; public static final String ANTISAMY_MYSPACE = "antisamy-myspace-1.4.4.xml"; public static final String ANTISAMY_EBAY = "antisamy-ebay-1.4.4.xml"; public static final String ANTISAMY_TINYMCE = "antisamy-tinymce-1.4.4.xml"; private final ClassLoader loader; private final Lang lang; private final AntiSamy antiSamy; private final CssScanner cssScanner; private final Metrics metrics; /* package */ Sanitize( ClassLoader loader, Lang lang, URL policy, Metrics metrics ) throws SanitizeException { this.loader = loader; this.lang = lang; try { this.antiSamy = new AntiSamy( Policy.getInstance( policy ) ); ResourceBundle messages; try { messages = ResourceBundle.getBundle( "AntiSamy", lang.toLocale(), loader ); } catch( MissingResourceException ex ) { messages = ResourceBundle.getBundle( "AntiSamy", Locale.UK, loader ); } this.cssScanner = new CssScanner( (InternalPolicy) InternalPolicy.getInstance( policy ), messages ); } catch( Exception ex ) { throw new SanitizeException( ex ); } this.metrics = metrics; } /** * Create a new {@literal Sanitize} instance using a given AntySamy policy. * * @param policyResourceName Name of the classpath resource to load the policy * * @return The new {@literal Sanitize} instance using the given AntySamy policy * * @throws SanitizeException if anything goes wrong */ public Sanitize withPolicy( String policyResourceName ) throws SanitizeException { ensureNotEmpty( "Policy resource name", policyResourceName ); return new Sanitize( loader, lang, loader.getResource( policyResourceName ), metrics ); } /** * Silently sanitize HTML. * <p> * Use {@literal AntiSamy}. * * @param input HTML input, may be null * * @return Sanitized HTML according the the active policy, empty string if anything goes wrong */ public String html( String input ) { Timer.Context timer = null; if( metrics != null ) { timer = metrics.metrics().timer( "io.werval.modules.sanitize.html" ).time(); } try { if( isEmpty( input ) ) { return EMPTY; } CleanResults results = antiSamy.scan( input ); if( results.getNumberOfErrors() > 0 ) { LOG.debug( "HTML sanitization filtered {} errors: {}", results.getNumberOfErrors(), results.getErrorMessages() ); } return results.getCleanHTML(); } catch( Exception ex ) { LOG.error( "HTML sanitization error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } finally { if( timer != null ) { timer.close(); } } } /** * Silently sanitize CSS. * <p> * Use {@literal AntiSamy} CssScanner. * * @param input CSS input, may be null * * @return Sanitized CSS according the the active policy, empty string if anything goes wrong */ public String css( String input ) { Timer.Context timer = null; if( metrics != null ) { timer = metrics.metrics().timer( "io.werval.modules.sanitize.css" ).time(); } try { if( isEmpty( input ) ) { return EMPTY; } CleanResults results = cssScanner.scanStyleSheet( input, Integer.MAX_VALUE ); // Lower this! if( results.getNumberOfErrors() > 0 ) { LOG.debug( "CSS sanitization filtered {} errors: {}", results.getNumberOfErrors(), results.getErrorMessages() ); } return results.getCleanHTML(); } catch( Exception ex ) { LOG.error( "CSS sanitization error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } finally { if( timer != null ) { timer.close(); } } } /** * Silently sanitize JSON. * <p> * This can be attached at either end of a data-pipeline to help satisfy Postel's principle: * <blockquote> * be conservative in what you do, be liberal in what you accept from others * </blockquote> * <p> * Applied to JSON-ish content from others, it will produce well-formed JSON that should satisfy any parser you use. * <p> * Applied to your output before you send, it will coerce minor mistakes in encoding and make it easier to embed * your JSON in HTML and XML. * * @param input JSON input, may be null * * @return Sanitized JSON, empty string if anything goes wrong */ public String json( String input ) { Timer.Context timer = null; if( metrics != null ) { timer = metrics.metrics().timer( "io.werval.modules.sanitize.json" ).time(); } try { if( isEmpty( input ) ) { return EMPTY; } return JsonSanitizer.sanitize( input ); } catch( Exception ex ) { LOG.error( "JSON sanitization error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } finally { if( timer != null ) { timer.close(); } } } /** * Encodes for HTML text content and text attributes. * * @param input HTML input, may be null * * @return Encoded HTML text, empty string if anything goes wrong */ public String forHtml( String input ) { if( isEmpty( input ) ) { return EMPTY; } try { return Encode.forHtml( input ); } catch( Exception ex ) { LOG.error( "Encoding for HTML error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } } /** * Encodes for CSS strings. * * @param input CSS input, may be null * * @return Encoded CSS, empty string if anything goes wrong */ public String forCssString( String input ) { if( isEmpty( input ) ) { return EMPTY; } try { return Encode.forCssString( input ); } catch( Exception ex ) { LOG.error( "Encoding for CSS string error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } } /** * Encodes for CSS URLs. * * @param input CSS input, may be null * * @return Encoded CSS, empty string if anything goes wrong */ public String forCssUrl( String input ) { if( isEmpty( input ) ) { return EMPTY; } try { return Encode.forCssUrl( input ); } catch( Exception ex ) { LOG.error( "Encoding for CSS URL error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } } /** * Encodes for a JavaScript string. * <p> * Safe for use in HTML script attributes (such as onclick), script blocks, JSON files, and JavaScript source. * * @param input Javascript input, may be null * * @return Encoded Javascript, empty string if anything goes wrong */ public String forJavascript( String input ) { if( isEmpty( input ) ) { return EMPTY; } try { return Encode.forJavaScript( input ); } catch( Exception ex ) { LOG.error( "Encoding for Javascript error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } } /** * Encodes for XML text content and text attributes. * * @param input XML input, may be null * * @return Encoded XML text, empty string if anything goes wrong */ public String forXml( String input ) { if( isEmpty( input ) ) { return EMPTY; } try { return Encode.forXml( input ); } catch( Exception ex ) { LOG.error( "Encoding for XML error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } } /** * Encodes data for an XML CDATA section. * * @param input input, may be null * * @return Encoded CDATA, empty string if anything goes wrong */ public String forCDATA( String input ) { if( isEmpty( input ) ) { return EMPTY; } try { return Encode.forCDATA( input ); } catch( Exception ex ) { LOG.error( "Encoding for CDATA error, will return empty string: {}", ex.getMessage(), ex ); return EMPTY; } } }