package freenet.support;
import java.util.Arrays;
import java.util.HashSet;
public final class StringValidityChecker {
/**
* Taken from http://kb.mozillazine.org/Network.IDN.blacklist_chars
*/
private static final HashSet<Character> idnBlacklist = new HashSet<Character>(Arrays.asList(
new Character[] {
0x0020, /* SPACE */
0x00A0, /* NO-BREAK SPACE */
0x00BC, /* VULGAR FRACTION ONE QUARTER */
0x00BD, /* VULGAR FRACTION ONE HALF */
0x01C3, /* LATIN LETTER RETROFLEX CLICK */
0x0337, /* COMBINING SHORT SOLIDUS OVERLAY */
0x0338, /* COMBINING LONG SOLIDUS OVERLAY */
0x05C3, /* HEBREW PUNCTUATION SOF PASUQ */
0x05F4, /* HEBREW PUNCTUATION GERSHAYIM */
0x06D4, /* ARABIC FULL STOP */
0x0702, /* SYRIAC SUBLINEAR FULL STOP */
0x115F, /* HANGUL CHOSEONG FILLER */
0x1160, /* HANGUL JUNGSEONG FILLER */
0x2000, /* EN QUAD */
0x2001, /* EM QUAD */
0x2002, /* EN SPACE */
0x2003, /* EM SPACE */
0x2004, /* THREE-PER-EM SPACE */
0x2005, /* FOUR-PER-EM SPACE */
0x2006, /* SIX-PER-EM-SPACE */
0x2007, /* FIGURE SPACE */
0x2008, /* PUNCTUATION SPACE */
0x2009, /* THIN SPACE */
0x200A, /* HAIR SPACE */
0x200B, /* ZERO WIDTH SPACE */
0x2024, /* ONE DOT LEADER */
0x2027, /* HYPHENATION POINT */
0x2028, /* LINE SEPARATOR */
0x2029, /* PARAGRAPH SEPARATOR */
0x202F, /* NARROW NO-BREAK SPACE */
0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
0x203A, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
0x2044, /* FRACTION SLASH */
0x205F, /* MEDIUM MATHEMATICAL SPACE */
0x2154, /* VULGAR FRACTION TWO THIRDS */
0x2155, /* VULGAR FRACTION ONE FIFTH */
0x2156, /* VULGAR FRACTION TWO FIFTHS */
0x2159, /* VULGAR FRACTION ONE SIXTH */
0x215A, /* VULGAR FRACTION FIVE SIXTHS */
0x215B, /* VULGAR FRACTION ONE EIGTH */
0x215F, /* FRACTION NUMERATOR ONE */
0x2215, /* DIVISION SLASH */
0x23AE, /* INTEGRAL EXTENSION */
0x29F6, /* SOLIDUS WITH OVERBAR */
0x29F8, /* BIG SOLIDUS */
0x2AFB, /* TRIPLE SOLIDUS BINARY RELATION */
0x2AFD, /* DOUBLE SOLIDUS OPERATOR */
0x2FF0, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT */
0x2FF1, /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW */
0x2FF2, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT */
0x2FF3, /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW */
0x2FF4, /* IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND */
0x2FF5, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE */
0x2FF6, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM BELOW */
0x2FF7, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LEFT */
0x2FF8, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER LEFT */
0x2FF9, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT */
0x2FFA, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT */
0x2FFB, /* IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID */
0x3000, /* IDEOGRAPHIC SPACE */
0x3002, /* IDEOGRAPHIC FULL STOP */
0x3014, /* LEFT TORTOISE SHELL BRACKET */
0x3015, /* RIGHT TORTOISE SHELL BRACKET */
0x3033, /* VERTICAL KANA REPEAT MARK UPPER HALF */
0x3164, /* HANGUL FILLER */
0x321D, /* PARENTHESIZED KOREAN CHARACTER OJEON */
0x321E, /* PARENTHESIZED KOREAN CHARACTER O HU */
0x33AE, /* SQUARE RAD OVER S */
0x33AF, /* SQUARE RAD OVER S SQUARED */
0x33C6, /* SQUARE C OVER KG */
0x33DF, /* SQUARE A OVER M */
0xFE14, /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
0xFE15, /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
0xFE3F, /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
0xFE5D, /* SMALL LEFT TORTOISE SHELL BRACKET */
0xFE5E, /* SMALL RIGHT TORTOISE SHELL BRACKET */
0xFEFF, /* ZERO-WIDTH NO-BREAK SPACE */
0xFF0E, /* FULLWIDTH FULL STOP */
0xFF0F, /* FULL WIDTH SOLIDUS */
0xFF61, /* HALFWIDTH IDEOGRAPHIC FULL STOP */
0xFFA0, /* HALFWIDTH HANGUL FILLER */
0xFFF9, /* INTERLINEAR ANNOTATION ANCHOR */
0xFFFA, /* INTERLINEAR ANNOTATION SEPARATOR */
0xFFFB, /* INTERLINEAR ANNOTATION TERMINATOR */
0xFFFC, /* OBJECT REPLACEMENT CHARACTER */
0xFFFD, /* REPLACEMENT CHARACTER */
}));
/**
* Taken from http://en.wikipedia.org/w/index.php?title=Filename&oldid=344618757
*/
private static final HashSet<Character> windowsReservedPrintableFilenameCharacters = new HashSet<Character>(Arrays.asList(
new Character[] { '/', '\\', '?', '*', ':', '|', '\"', '<', '>'}));
/**
* Taken from http://en.wikipedia.org/w/index.php?title=Filename&oldid=344618757
*/
private static final HashSet<String> windowsReservedFilenames = new HashSet<String>(Arrays.asList(
new String[] { "aux", "clock$", "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "con",
"lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "nul", "prn"}));
/**
* Taken from http://en.wikipedia.org/w/index.php?title=Filename&oldid=344618757
*/
private static final HashSet<Character> macOSReservedPrintableFilenameCharacters = new HashSet<Character>(Arrays.asList(
new Character[] { ':', '/'}));
/**
* Returns true if the given character is one of the reserved printable character in filenames on Windows.
* ATTENTION: This function does NOT check whether the given character is a control character, those are also forbidden!
* (Control characters are usually disallowed for all operating systems in filenames by our validity checker so it checks them separately)
*/
public static boolean isWindowsReservedPrintableFilenameCharacter(Character c) {
return windowsReservedPrintableFilenameCharacters.contains(c);
}
public static boolean isWindowsReservedFilename(String filename) {
filename = filename.toLowerCase();
int nameEnd = filename.indexOf('.'); // For files with multiple dots, the part before the first dot counts as the filename. E.g. "con.blah.txt" is reserved.
if(nameEnd == -1)
nameEnd = filename.length();
return windowsReservedFilenames.contains(filename.substring(0, nameEnd));
}
/**
* Returns true if the given character is one of the reserved printable character in filenames on Mac OS.
* ATTENTION: This function does NOT check whether the given character is a control character, those are also forbidden!
* (Control characters are usually disallowed for all operating systems in filenames by our validity checker so it checks them separately)
*/
public static boolean isMacOSReservedPrintableFilenameCharacter(Character c) {
return macOSReservedPrintableFilenameCharacters.contains(c);
}
public static boolean isUnixReservedPrintableFilenameCharacter(char c) {
return c == '/';
}
public static boolean containsNoIDNBlacklistCharacters(String text) {
for(Character c : text.toCharArray()) {
if(idnBlacklist.contains(c))
return false;
}
return true;
}
public static boolean containsNoLinebreaks(String text) {
for(Character c : text.toCharArray()) {
if(Character.getType(c) == Character.LINE_SEPARATOR
|| Character.getType(c) == Character.PARAGRAPH_SEPARATOR
|| c == '\n' || c == '\r')
return false;
}
return true;
}
/**
* Check for any values in the string that are not valid Unicode
* characters.
*/
public static boolean containsNoInvalidCharacters(String text) {
for (int i = 0; i < text.length(); ) {
int c = text.codePointAt(i);
i += Character.charCount(c);
if ((c & 0xFFFE) == 0xFFFE
|| Character.getType(c) == Character.SURROGATE)
return false;
}
return true;
}
/**
* Check for any control characters (including tab, LF, and CR) in
* the string.
*/
public static boolean containsNoControlCharacters(String text) {
for(Character c : text.toCharArray()) {
if(Character.getType(c) == Character.CONTROL)
return false;
}
return true;
}
/**
* Check for any unpaired directional or annotation characters in
* the string, or any nested annotations.
*/
public static boolean containsNoInvalidFormatting(String text) {
int dirCount = 0;
boolean inAnnotatedText = false;
boolean inAnnotation = false;
for (Character c : text.toCharArray()) {
if (c == 0x202A // LEFT-TO-RIGHT EMBEDDING
|| c == 0x202B // RIGHT-TO-LEFT EMBEDDING
|| c == 0x202D // LEFT-TO-RIGHT OVERRIDE
|| c == 0x202E) { // RIGHT-TO-LEFT OVERRIDE
dirCount++;
}
else if (c == 0x202C) { // POP DIRECTIONAL FORMATTING
dirCount--;
if (dirCount < 0)
return false;
}
else if (c == 0xFFF9) { // INTERLINEAR ANNOTATION ANCHOR
if (inAnnotatedText || inAnnotation)
return false;
inAnnotatedText = true;
}
else if (c == 0xFFFA) { // INTERLINEAR ANNOTATION SEPARATOR
if (!inAnnotatedText)
return false;
inAnnotatedText = false;
inAnnotation = true;
}
else if (c == 0xFFFB) { // INTERLINEAR ANNOTATION TERMINATOR
if (!inAnnotation)
return false;
inAnnotation = false;
}
}
return (dirCount == 0 && !inAnnotatedText && !inAnnotation);
}
public static boolean isLatinLettersAndNumbersOnly(String text) {
for(char c : text.toCharArray()) {
if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= '0' && c <= '9')
continue;
else
return false;
}
return true;
}
}