/* * Copyright 1998-2014 University Corporation for Atmospheric Research/Unidata * * Portions of this software were developed by the Unidata Program at the * University Corporation for Atmospheric Research. * * Access and use of this software shall impose the following obligations * and understandings on the user. The user is granted the right, without * any fee or cost, to use, copy, modify, alter, enhance and distribute * this software, and any derivative works thereof, and its supporting * documentation for any purpose whatsoever, provided that this entire * notice appears in all copies of the software, derivative works and * supporting documentation. Further, UCAR requests that the user credit * UCAR/Unidata in any publications that result from the use of this * software or in any product that includes this software. The names UCAR * and/or Unidata, however, may not be used in any advertising or publicity * to endorse or promote any products or commercial entity unless specific * written permission is obtained from UCAR/Unidata. The user also * understands that UCAR/Unidata is not obligated to provide the user with * any support, consulting, training or assistance of any kind with regard * to the use, operation and performance of this software nor to provide * the user with any updates, revisions, new versions or "bug fixes." * * THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL, * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION * WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE. */ package thredds.util; import org.springframework.web.util.HtmlUtils; import java.io.UnsupportedEncodingException; import java.io.File; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.nio.charset.Charset; import java.nio.charset.CharacterCodingException; import java.nio.CharBuffer; import java.nio.ByteBuffer; /** * Utility methods for validating strings. * * @author edavis * @since 3.16.47 */ public class StringValidateEncodeUtils { private static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger( StringValidateEncodeUtils.class ); private StringValidateEncodeUtils() {} public final static String CHARACTER_ENCODING_UTF_8 = "UTF-8"; /** * Return true if the given String is a valid single-line String. * * <p>A string will be considered a valid single-line string if it does not * contain any characters from these Unicode general categories: * * <ul> * <li>Cc - Other, Control</li> * <li>Cf - Other, Format</li> * <li>Cs - Other, Surrogate</li> * <li>Co - Other, Private Use</li> * <li>Cn - Other, Not Assigned</li> * <li>Zl - Separator, Line</li> * <li>Zp - Separator, Paragraph</li> * </ul> * * <p>Or, in other words, allow: Letters, Numbers, Marks, Punctuation, * Symbols, and Space separators. * * @param singleLineString the String to validate * @return true if the given String is a valid single-line String. */ public static boolean validSingleLineString( String singleLineString ) { if ( singleLineString == null ) return false; Matcher m = INVALID_CHARACTERS_FOR_SINGLE_LINE_STRING_PATTERN.matcher( singleLineString ); return ! m.find(); } private final static Pattern INVALID_CHARACTERS_FOR_SINGLE_LINE_STRING_PATTERN = Pattern.compile( "[\\p{Zl}\\p{Zp}\\p{C}]"); /** * Return true if the given String is a valid path. * * <p> * A String is considered a valid path if: * <ul> * <li> when passed to validSingleLineString(String) true is returned, and * <li> it does not contain any parent path segments ("../").</li> * </li> * </ul> * * @param path the String to validate * @return true if the given String is a valid path. * @see #validSingleLineString(String) */ @SuppressWarnings({"SimplifiableIfStatement"}) public static boolean validPath( String path ) { if ( path == null ) return false; // Don't allow ".." directories in path. if ( path.indexOf( "/../" ) != -1 || path.equals( ".." ) || path.startsWith( "../" ) || path.endsWith( "/.." ) ) return false; return validSingleLineString( path ); } /** * Return true if the given String is a valid File path. * * <p> * A String is considered a valid File path if: * <ul> * <li> when passed to validPath(String) true is returned; and</li> * <li> it does not contain the Java File path separator * (java.io.File.pathSeparatorChar) which is system dependant.</li> * </ul> * * @param path the String to validate * @return true if the given String is a valid File path. * @see #validPath(String) */ @SuppressWarnings({"SimplifiableIfStatement"}) public static boolean validFilePath( String path ) { if ( path == null ) return false; if ( path.indexOf( File.pathSeparatorChar) != -1 ) return false; return validPath( path ); } /** * Return true if the given String is a valid URI string. * * <p> * A String is considered a valid URI path if: * <ul> * <li> when passed to validPath(String) true is returned; and</li> * <li> ??? see Note below</li> * </ul> * * <p><strong>NOTE:</strong> Check compliance with URI RFC (RFC 3986) - TODO. * * @param uri the String to validate. * @return true if the given String is a valid URI string. */ @SuppressWarnings({"SimplifiableIfStatement"}) public static boolean validUriString( String uri ) { if ( uri == null ) return false; return validPath( uri ); } /** * Return true if the given String is a valid ID string. * * <p> * A String is considered a valid ID string if: * <ul> * <li>it contains no space separator characters (Unicode general * category Zs - Separator, Space); and</li> * <li>true is returned when the string is passed to * validSingleLineString(String).</li> * </ul> * * @param id the String to validate * @return true if the given String is a valid ID string. * @see #validSingleLineString(String) */ public static boolean validIdString( String id ) { if ( id == null ) return false; Matcher m = INVALID_CHARACTERS_FOR_ID_STRING_PATTERN.matcher( id ); return ! ( m.find() || ! validSingleLineString( id ) ); } private final static Pattern INVALID_CHARACTERS_FOR_ID_STRING_PATTERN = Pattern.compile( "[\\p{Zs}]"); /** * Return true if the given String contains any less than ("<") or * greater than (">") characters; otherwise return false. * * @param string the String to check. * @return true if the given String contains any less than ("<") or greater than (">") characters */ public static boolean containsAngleBracketCharacters( String string ) { if ( string == null ) return false; if ( string.indexOf( "<" ) == -1 && string.indexOf( ">" ) == -1 ) return false; return true; } /** * Return true if the given String contains any ampersand ("&") * characters; otherwise return false. * * @param string the String to check. * @return true if the given String contains any ampersand ("&") characters */ public static boolean containsAmpersandCharacters( String string ) { if ( string == null ) return false; if ( string.indexOf( "&" ) == -1 ) return false; return true; } /** * Return true if the given String contains any backslash ("\") * characters; otherwise return false. * * @param string the String to check. * @return true if the given String contains any backslash ("\") characters */ public static boolean containsBackslashCharacters( String string ) { if ( string == null ) return false; if ( string.indexOf( "\\" ) == -1 ) return false; return true; } public static boolean validDecimalNumber( String number ) { if ( number == null ) return false; Matcher m = VALID_DECIMAL_DIGITS_PATTERN.matcher( number ); return m.matches(); } private final static Pattern VALID_DECIMAL_DIGITS_PATTERN = Pattern.compile( "[\\+\\-]?[0-9]+"); /** * Return true if the given String is "true" or "false", ignoring case. * * @param boolString the String to validate. * @return true if the given String is "true" or "false", ignoring case. */ @SuppressWarnings({"SimplifiableIfStatement"}) public static boolean validBooleanString( String boolString ) { if ( boolString == null ) return false; Matcher m = VALID_CHARACTERS_FOR_BOOLEAN_STRING_PATTERN.matcher( boolString ); if ( ! m.matches() ) return false; return boolString.equalsIgnoreCase( "true" ) || boolString.equalsIgnoreCase( "false" ); } private final static Pattern VALID_CHARACTERS_FOR_BOOLEAN_STRING_PATTERN = Pattern.compile( "[trueTRUEfalsFALS]*" ); /** * Return true if the given String is an alphanumeric string. * * @param alphNumString the String to validate. * @return true if the given String is an alphanumeric string. */ public static boolean validAlphanumericString( String alphNumString ) { if ( alphNumString == null ) return false; Matcher m = VALID_CHARACTERS_FOR_ALPHANUMERIC_STRING_PATTERN.matcher( alphNumString); return m.matches(); } private final static Pattern VALID_CHARACTERS_FOR_ALPHANUMERIC_STRING_PATTERN = Pattern.compile( "[a-zA-Z0-9]*" ); /** * Return true if the given String is an alphanumeric string and one of * the valid strings in the constrained set. * * @param alphNumString the String to validate. * @param constrainedSet the set of valid strings * @param ignoreCase if true ignore the case of the letters * @return true if the given String is an alphanumeric string. */ public static boolean validAlphanumericStringConstrainedSet( String alphNumString, String[] constrainedSet, boolean ignoreCase ) { if ( alphNumString == null || constrainedSet == null || constrainedSet.length == 0 ) return false; Matcher m = VALID_CHARACTERS_FOR_ALPHANUMERIC_STRING_PATTERN.matcher( alphNumString ); if ( !m.matches() ) return false; for ( String s : constrainedSet ) { if ( ignoreCase ? alphNumString.equalsIgnoreCase( s ) : alphNumString.equals( s ) ) return true; } return false; } /** * Return true if the given path does not ascend into parent directory. * * @param path the path to check * @return true if the given path does not ascend into parent directory. */ @SuppressWarnings({"UnnecessaryContinue"}) public static boolean descendOnlyFilePath( String path ) { String[] pathSegments = path.split( "/" ); //String[] newPathSegments = new String[pathSegments.length]; int i = 0; for ( int indxOrigSegs = 0; indxOrigSegs < pathSegments.length; indxOrigSegs++ ) { String s = pathSegments[ indxOrigSegs]; if ( s.equals( "." ) ) continue; else if ( s.equals( ".." ) ) { if ( i == 0 ) return false; i--; } else { //newPathSegments[i] = s; i++; } } return true; } public static String encodeLogMessages( String msg ) { // For now, just use URLEncoder. // ToDo Would rather not encode "/" and " " (maybe others). try { return java.net.URLEncoder.encode( msg, CHARACTER_ENCODING_UTF_8 ); } catch ( UnsupportedEncodingException e ) { // This SHOULD NEVER HAPPEN as all JVMs are required to support UTF-8 encoding. log.error( "UnsupportedEncodingException for \"" + CHARACTER_ENCODING_UTF_8 + "\": " + e.getMessage() ); throw new IllegalStateException( "UnsupportedEncodingException for \"" + CHARACTER_ENCODING_UTF_8 + "\"."); } } public static String encodeContentForHtml( String content ) { return HtmlUtils.htmlEscape( content ); } /* public static String encodeContentForXml( String content ) { return StringEscapeUtils.escapeXml( content ); } */ /** * <strong>NOT YET IMPLEMENTED:</strong> * Convert a percent hex encoded string (%20) to a unicode code point. * * @param percentHexString the string to convert to a Unicode code point * @param charsetName the name of the Character set to use in the conversion * @return the Unicode code point represented by the given percentHex encoded string. * @throws IllegalArgumentException if the given percentHex string is not valid or if the requested character set is not supported. * public static int percentHexString2unicodeCodePoint( String percentHexString, String charsetName ) { Charset charset = Charset.availableCharsets().get( charsetName ); if ( charset == null ) throw new IllegalArgumentException( "Unsupported charset [" + charsetName + "]." ); if ( ! StringValidateEncodeUtils.validPercentHexOctetsString( percentHexString ) ) throw new IllegalArgumentException( "Invalid percentHexOctets string ["+percentHexString+"]."); String[] hexOctets = percentHexString.split( "%" ); ByteBuffer bb = ByteBuffer.allocate( hexOctets.length ); bb.putInt( Integer.valueOf( hexOctets[0], 16 )); CharBuffer cb = charset.decode( bb ); //cb.rewind().get(); cb.hasArray(); // ToDo Look Implement. return -1; } */ /** * Check that the given string is a valid percentHexOctets string. The * string is considered valid if it only contains a sequence of "%" prefixed, * two character strings where each two character string is composed only of * US-ASCII digits and upper- or lower-case A-F. * * For example: "%31%32" or "%7b%7d%7E" * * @param percentHexOctetsString the string to check for validity * @return true if the string is valid, false otherwise. */ public static boolean validPercentHexOctetsString( String percentHexOctetsString ) { if ( percentHexOctetsString == null ) return false; Matcher m = VALID_PERCENT_HEX_OCTETS_PATTERN.matcher( percentHexOctetsString ); return m.matches(); } private final static Pattern VALID_PERCENT_HEX_OCTETS_PATTERN = Pattern.compile( "(?:%[0-9a-fA-F]{2})*" ); /** * Return the percentHexOctets string that represents the given Unicode * code point in the given character set or null if the given character * set cannot encode the given code point. * * @param codePoint the given Unicode code point * @param charsetName the name of the character set. * @return the percentHexOctets string that represents the given Unicode code point in the given character set. * @throws IllegalArgumentException if the code point is not defined or the the character set is not supported. */ public static String unicodeCodePoint2PercentHexString( int codePoint, String charsetName ) { if ( ! Character.isDefined( codePoint )) throw new IllegalArgumentException( String.format( "Given code point [U+%1$04X - %1$d] not assigned to an abstract character.", codePoint ) ); if ( Character.getType( codePoint) == Character.SURROGATE ) throw new IllegalArgumentException( String.format( "Given code point [U+%1$04X - %1$d] is an unencodable (by itself) surrogate character.", codePoint ) ); Charset charset = Charset.availableCharsets().get( charsetName ); if ( charset == null ) throw new IllegalArgumentException( String.format( "Unsupported charset [%s].", charsetName)); char[] chars = Character.toChars( codePoint ); ByteBuffer byteBuffer = null; try { byteBuffer = charset.newEncoder().encode( CharBuffer.wrap( chars ) ); } catch ( CharacterCodingException e ) { String message = String.format( "Given code point [U+%1$04X - %1$d] cannot be encode in given charset [%2$s].", codePoint, charsetName ); throw new IllegalArgumentException( message, e ); } byteBuffer.rewind(); StringBuilder encodedString = new StringBuilder(); for ( int i = 0; i < byteBuffer.limit(); i++ ) { String asHex = Integer.toHexString( byteBuffer.get() & 0xFF ); encodedString.append( "%" ).append( asHex.length() == 1 ? "0" : "").append( asHex ); } return encodedString.toString(); } }