/*
* Copyright 1998-2014 University Corporation for Atmospheric Research/Unidata
*
* Portions of this software were developed by the Unidata Program at the
* University Corporation for Atmospheric Research.
*
* Access and use of this software shall impose the following obligations
* and understandings on the user. The user is granted the right, without
* any fee or cost, to use, copy, modify, alter, enhance and distribute
* this software, and any derivative works thereof, and its supporting
* documentation for any purpose whatsoever, provided that this entire
* notice appears in all copies of the software, derivative works and
* supporting documentation. Further, UCAR requests that the user credit
* UCAR/Unidata in any publications that result from the use of this
* software or in any product that includes this software. The names UCAR
* and/or Unidata, however, may not be used in any advertising or publicity
* to endorse or promote any products or commercial entity unless specific
* written permission is obtained from UCAR/Unidata. The user also
* understands that UCAR/Unidata is not obligated to provide the user with
* any support, consulting, training or assistance of any kind with regard
* to the use, operation and performance of this software nor to provide
* the user with any updates, revisions, new versions or "bug fixes."
*
* THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE.
*/
package thredds.util;
import org.springframework.web.util.HtmlUtils;
import java.io.UnsupportedEncodingException;
import java.io.File;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.nio.charset.Charset;
import java.nio.charset.CharacterCodingException;
import java.nio.CharBuffer;
import java.nio.ByteBuffer;
/**
* Utility methods for validating strings.
*
* @author edavis
* @since 3.16.47
*/
public class StringValidateEncodeUtils
{
private static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger( StringValidateEncodeUtils.class );
private StringValidateEncodeUtils() {}
public final static String CHARACTER_ENCODING_UTF_8 = "UTF-8";
/**
* Return true if the given String is a valid single-line String.
*
* <p>A string will be considered a valid single-line string if it does not
* contain any characters from these Unicode general categories:
*
* <ul>
* <li>Cc - Other, Control</li>
* <li>Cf - Other, Format</li>
* <li>Cs - Other, Surrogate</li>
* <li>Co - Other, Private Use</li>
* <li>Cn - Other, Not Assigned</li>
* <li>Zl - Separator, Line</li>
* <li>Zp - Separator, Paragraph</li>
* </ul>
*
* <p>Or, in other words, allow: Letters, Numbers, Marks, Punctuation,
* Symbols, and Space separators.
*
* @param singleLineString the String to validate
* @return true if the given String is a valid single-line String.
*/
public static boolean validSingleLineString( String singleLineString )
{
if ( singleLineString == null ) return false;
Matcher m = INVALID_CHARACTERS_FOR_SINGLE_LINE_STRING_PATTERN.matcher( singleLineString );
return ! m.find();
}
private final static Pattern INVALID_CHARACTERS_FOR_SINGLE_LINE_STRING_PATTERN
= Pattern.compile( "[\\p{Zl}\\p{Zp}\\p{C}]");
/**
* Return true if the given String is a valid path.
*
* <p>
* A String is considered a valid path if:
* <ul>
* <li> when passed to validSingleLineString(String) true is returned, and
* <li> it does not contain any parent path segments ("../").</li>
* </li>
* </ul>
*
* @param path the String to validate
* @return true if the given String is a valid path.
* @see #validSingleLineString(String)
*/
@SuppressWarnings({"SimplifiableIfStatement"})
public static boolean validPath( String path )
{
if ( path == null )
return false;
// Don't allow ".." directories in path.
if ( path.indexOf( "/../" ) != -1 || path.equals( ".." )
|| path.startsWith( "../" ) || path.endsWith( "/.." ) )
return false;
return validSingleLineString( path );
}
/**
* Return true if the given String is a valid File path.
*
* <p>
* A String is considered a valid File path if:
* <ul>
* <li> when passed to validPath(String) true is returned; and</li>
* <li> it does not contain the Java File path separator
* (java.io.File.pathSeparatorChar) which is system dependant.</li>
* </ul>
*
* @param path the String to validate
* @return true if the given String is a valid File path.
* @see #validPath(String)
*/
@SuppressWarnings({"SimplifiableIfStatement"})
public static boolean validFilePath( String path )
{
if ( path == null )
return false;
if ( path.indexOf( File.pathSeparatorChar) != -1 )
return false;
return validPath( path );
}
/**
* Return true if the given String is a valid URI string.
*
* <p>
* A String is considered a valid URI path if:
* <ul>
* <li> when passed to validPath(String) true is returned; and</li>
* <li> ??? see Note below</li>
* </ul>
*
* <p><strong>NOTE:</strong> Check compliance with URI RFC (RFC 3986) - TODO.
*
* @param uri the String to validate.
* @return true if the given String is a valid URI string.
*/
@SuppressWarnings({"SimplifiableIfStatement"})
public static boolean validUriString( String uri )
{
if ( uri == null )
return false;
return validPath( uri );
}
/**
* Return true if the given String is a valid ID string.
*
* <p>
* A String is considered a valid ID string if:
* <ul>
* <li>it contains no space separator characters (Unicode general
* category Zs - Separator, Space); and</li>
* <li>true is returned when the string is passed to
* validSingleLineString(String).</li>
* </ul>
*
* @param id the String to validate
* @return true if the given String is a valid ID string.
* @see #validSingleLineString(String)
*/
public static boolean validIdString( String id )
{
if ( id == null ) return false;
Matcher m = INVALID_CHARACTERS_FOR_ID_STRING_PATTERN.matcher( id );
return ! ( m.find() || ! validSingleLineString( id ) );
}
private final static Pattern INVALID_CHARACTERS_FOR_ID_STRING_PATTERN
= Pattern.compile( "[\\p{Zs}]");
/**
* Return true if the given String contains any less than ("<") or
* greater than (">") characters; otherwise return false.
*
* @param string the String to check.
* @return true if the given String contains any less than ("<") or greater than (">") characters
*/
public static boolean containsAngleBracketCharacters( String string )
{
if ( string == null )
return false;
if ( string.indexOf( "<" ) == -1
&& string.indexOf( ">" ) == -1 )
return false;
return true;
}
/**
* Return true if the given String contains any ampersand ("&")
* characters; otherwise return false.
*
* @param string the String to check.
* @return true if the given String contains any ampersand ("&") characters
*/
public static boolean containsAmpersandCharacters( String string )
{
if ( string == null )
return false;
if ( string.indexOf( "&" ) == -1 )
return false;
return true;
}
/**
* Return true if the given String contains any backslash ("\")
* characters; otherwise return false.
*
* @param string the String to check.
* @return true if the given String contains any backslash ("\") characters
*/
public static boolean containsBackslashCharacters( String string )
{
if ( string == null )
return false;
if ( string.indexOf( "\\" ) == -1 )
return false;
return true;
}
public static boolean validDecimalNumber( String number )
{
if ( number == null )
return false;
Matcher m = VALID_DECIMAL_DIGITS_PATTERN.matcher( number );
return m.matches();
}
private final static Pattern VALID_DECIMAL_DIGITS_PATTERN
= Pattern.compile( "[\\+\\-]?[0-9]+");
/**
* Return true if the given String is "true" or "false", ignoring case.
*
* @param boolString the String to validate.
* @return true if the given String is "true" or "false", ignoring case.
*/
@SuppressWarnings({"SimplifiableIfStatement"})
public static boolean validBooleanString( String boolString )
{
if ( boolString == null )
return false;
Matcher m = VALID_CHARACTERS_FOR_BOOLEAN_STRING_PATTERN.matcher( boolString );
if ( ! m.matches() )
return false;
return boolString.equalsIgnoreCase( "true" )
|| boolString.equalsIgnoreCase( "false" );
}
private final static Pattern VALID_CHARACTERS_FOR_BOOLEAN_STRING_PATTERN
= Pattern.compile( "[trueTRUEfalsFALS]*" );
/**
* Return true if the given String is an alphanumeric string.
*
* @param alphNumString the String to validate.
* @return true if the given String is an alphanumeric string.
*/
public static boolean validAlphanumericString( String alphNumString )
{
if ( alphNumString == null )
return false;
Matcher m = VALID_CHARACTERS_FOR_ALPHANUMERIC_STRING_PATTERN.matcher( alphNumString);
return m.matches();
}
private final static Pattern VALID_CHARACTERS_FOR_ALPHANUMERIC_STRING_PATTERN
= Pattern.compile( "[a-zA-Z0-9]*" );
/**
* Return true if the given String is an alphanumeric string and one of
* the valid strings in the constrained set.
*
* @param alphNumString the String to validate.
* @param constrainedSet the set of valid strings
* @param ignoreCase if true ignore the case of the letters
* @return true if the given String is an alphanumeric string.
*/
public static boolean validAlphanumericStringConstrainedSet( String alphNumString,
String[] constrainedSet,
boolean ignoreCase )
{
if ( alphNumString == null || constrainedSet == null || constrainedSet.length == 0 )
return false;
Matcher m = VALID_CHARACTERS_FOR_ALPHANUMERIC_STRING_PATTERN.matcher( alphNumString );
if ( !m.matches() )
return false;
for ( String s : constrainedSet )
{
if ( ignoreCase ? alphNumString.equalsIgnoreCase( s ) : alphNumString.equals( s ) )
return true;
}
return false;
}
/**
* Return true if the given path does not ascend into parent directory.
*
* @param path the path to check
* @return true if the given path does not ascend into parent directory.
*/
@SuppressWarnings({"UnnecessaryContinue"})
public static boolean descendOnlyFilePath( String path )
{
String[] pathSegments = path.split( "/" );
//String[] newPathSegments = new String[pathSegments.length];
int i = 0;
for ( int indxOrigSegs = 0; indxOrigSegs < pathSegments.length; indxOrigSegs++ )
{
String s = pathSegments[ indxOrigSegs];
if ( s.equals( "." ) )
continue;
else if ( s.equals( ".." ) )
{
if ( i == 0 )
return false;
i--;
}
else
{
//newPathSegments[i] = s;
i++;
}
}
return true;
}
public static String encodeLogMessages( String msg )
{
// For now, just use URLEncoder.
// ToDo Would rather not encode "/" and " " (maybe others).
try
{
return java.net.URLEncoder.encode( msg, CHARACTER_ENCODING_UTF_8 );
}
catch ( UnsupportedEncodingException e )
{
// This SHOULD NEVER HAPPEN as all JVMs are required to support UTF-8 encoding.
log.error( "UnsupportedEncodingException for \"" + CHARACTER_ENCODING_UTF_8 + "\": " + e.getMessage() );
throw new IllegalStateException( "UnsupportedEncodingException for \"" + CHARACTER_ENCODING_UTF_8 + "\".");
}
}
public static String encodeContentForHtml( String content )
{
return HtmlUtils.htmlEscape( content );
}
/* public static String encodeContentForXml( String content )
{
return StringEscapeUtils.escapeXml( content );
} */
/**
* <strong>NOT YET IMPLEMENTED:</strong>
* Convert a percent hex encoded string (%20) to a unicode code point.
*
* @param percentHexString the string to convert to a Unicode code point
* @param charsetName the name of the Character set to use in the conversion
* @return the Unicode code point represented by the given percentHex encoded string.
* @throws IllegalArgumentException if the given percentHex string is not valid or if the requested character set is not supported.
*
public static int percentHexString2unicodeCodePoint( String percentHexString, String charsetName )
{
Charset charset = Charset.availableCharsets().get( charsetName );
if ( charset == null )
throw new IllegalArgumentException( "Unsupported charset [" + charsetName + "]." );
if ( ! StringValidateEncodeUtils.validPercentHexOctetsString( percentHexString ) )
throw new IllegalArgumentException( "Invalid percentHexOctets string ["+percentHexString+"].");
String[] hexOctets = percentHexString.split( "%" );
ByteBuffer bb = ByteBuffer.allocate( hexOctets.length );
bb.putInt( Integer.valueOf( hexOctets[0], 16 ));
CharBuffer cb = charset.decode( bb );
//cb.rewind().get();
cb.hasArray();
// ToDo Look Implement.
return -1;
} */
/**
* Check that the given string is a valid percentHexOctets string. The
* string is considered valid if it only contains a sequence of "%" prefixed,
* two character strings where each two character string is composed only of
* US-ASCII digits and upper- or lower-case A-F.
*
* For example: "%31%32" or "%7b%7d%7E"
*
* @param percentHexOctetsString the string to check for validity
* @return true if the string is valid, false otherwise.
*/
public static boolean validPercentHexOctetsString( String percentHexOctetsString )
{
if ( percentHexOctetsString == null )
return false;
Matcher m = VALID_PERCENT_HEX_OCTETS_PATTERN.matcher( percentHexOctetsString );
return m.matches();
}
private final static Pattern VALID_PERCENT_HEX_OCTETS_PATTERN
= Pattern.compile( "(?:%[0-9a-fA-F]{2})*" );
/**
* Return the percentHexOctets string that represents the given Unicode
* code point in the given character set or null if the given character
* set cannot encode the given code point.
*
* @param codePoint the given Unicode code point
* @param charsetName the name of the character set.
* @return the percentHexOctets string that represents the given Unicode code point in the given character set.
* @throws IllegalArgumentException if the code point is not defined or the the character set is not supported.
*/
public static String unicodeCodePoint2PercentHexString( int codePoint, String charsetName )
{
if ( ! Character.isDefined( codePoint ))
throw new IllegalArgumentException( String.format( "Given code point [U+%1$04X - %1$d] not assigned to an abstract character.", codePoint ) );
if ( Character.getType( codePoint) == Character.SURROGATE )
throw new IllegalArgumentException( String.format( "Given code point [U+%1$04X - %1$d] is an unencodable (by itself) surrogate character.", codePoint ) );
Charset charset = Charset.availableCharsets().get( charsetName );
if ( charset == null )
throw new IllegalArgumentException( String.format( "Unsupported charset [%s].", charsetName));
char[] chars = Character.toChars( codePoint );
ByteBuffer byteBuffer = null;
try {
byteBuffer = charset.newEncoder().encode( CharBuffer.wrap( chars ) );
} catch ( CharacterCodingException e ) {
String message = String.format( "Given code point [U+%1$04X - %1$d] cannot be encode in given charset [%2$s].", codePoint, charsetName );
throw new IllegalArgumentException( message, e );
}
byteBuffer.rewind();
StringBuilder encodedString = new StringBuilder();
for ( int i = 0; i < byteBuffer.limit(); i++ ) {
String asHex = Integer.toHexString( byteBuffer.get() & 0xFF );
encodedString.append( "%" ).append( asHex.length() == 1 ? "0" : "").append( asHex );
}
return encodedString.toString();
}
}