/*
* Copyright (c) 2012, 3 Round Stones Inc. Some rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the openrdf.org nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
package org.openrdf.repository.object;
import java.io.Serializable;
import java.util.Locale;
import java.util.regex.PatternSyntaxException;
/**
* Represents a string and language tag, and thus represents a plain RDF literal
* with a language tag.
*
* This class includes a number of similar methods to {@link java.lang.String},
* that preserve the language tag. In addition it includes a method to compare
* language tags {@link #matchesLang(String)}.
*
* @author James Leigh
*
*/
public class LangString implements CharSequence, Serializable,
Comparable<LangString> {
private static final long serialVersionUID = 8175463447271413979L;
/**
* Constructs a LangString using the default {@link Locale} for the
* language.
*
* @param label
* @return a LangString with the given label and a default language
*/
public static LangString valueOf(String label) {
return new LangString(label);
}
/**
* Constructs a LangString using the given label and language.
*
* @param label
* @param language
* @return a LangString with the given label and language
*/
public static LangString valueOf(String label, String language) {
return new LangString(label, language);
}
private static String toLang(Locale locale) {
String language = locale.getLanguage();
String country = locale.getCountry();
String variant = locale.getVariant();
boolean l = language.length() != 0;
boolean c = country.length() != 0;
boolean v = variant.length() != 0;
StringBuilder result = new StringBuilder(language);
if (c || (l && v)) {
result.append('-').append(country.toLowerCase());
}
if (v && (l || c)) {
result.append('-').append(variant);
}
return result.toString();
}
private final String label;
private final String lang;
private Locale locale;
/**
* Constructs a LangString using the default {@link Locale} for the
* language.
*
* @param label
*/
public LangString(String label) {
this(label, Locale.getDefault());
}
/**
* Constructs a LangString using the given label and language.
*
* @param label
* @param lang
*/
public LangString(String label, String lang) {
assert label != null;
if (lang != null && lang.length() < 1)
throw new IllegalArgumentException("language cannot be the empty string");
this.label = label;
this.lang = lang == null ? toLang(Locale.getDefault()) : lang;
}
/**
* Constructs a LangString using the given label and locale.
*
* @param label
* @param locale
*/
public LangString(String label, Locale locale) {
this(label, toLang(locale));
this.locale = locale;
}
public String getLang() {
return lang;
}
/**
* The {@link String} portion of this object
*
* @return this string without a language
*/
@Override
public String toString() {
return label;
}
/**
* The language of the current LangString as a Locale.
*
* @return this language as a Locale
*/
public synchronized Locale getLocale() {
if (locale == null) {
String[] split = getLang().split("-", 3);
if (split.length == 1) {
locale = new Locale(getLang());
} else if (split.length == 2) {
locale = new Locale(split[0], split[1]);
} else {
locale = new Locale(split[0], split[1], split[2]);
}
}
return locale;
}
/**
* Returns a hash code for this string. The hash code for a
* <code>String</code> object is computed as <blockquote>
*
* <pre>
* s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
* </pre>
*
* </blockquote> using <code>int</code> arithmetic, where <code>s[i]</code>
* is the <i>i</i>th character of the string, <code>n</code> is the length
* of the string, and <code>^</code> indicates exponentiation. (The hash
* value of the empty string is zero.)
*
* @return a hash code value for this object.
*/
@Override
public int hashCode() {
return toString().hashCode();
}
/**
* Compares this string to the specified object. The result is {@code true}
* if and only if the argument is not {@code null} and is a {@code LangString}
* object that represents the same sequence of characters as this object.
*
* @param o
* The object to compare this {@code LangString} against
*
* @return {@code true} if the given object represents a {@code LangString}
* equivalent to this string, {@code false} otherwise
*
* @see #compareTo(LangString)
* @see #equalsIgnoreCase(Object)
*/
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o instanceof LangString) {
LangString other = (LangString) o;
if (!toString().equals(other.toString()))
return false;
if (!getLang().equalsIgnoreCase(other.getLang()))
return false;
return true;
}
return false;
}
/**
* Compares this {@code LangString} to another {@code LangString}, ignoring case
* considerations. Two strings are considered equal ignoring case if they
* are of the same length and corresponding characters in the two strings
* are equal ignoring case.
*
* <p>
* Two characters {@code c1} and {@code c2} are considered the same ignoring
* case if at least one of the following is true:
* <ul>
* <li>The two characters are the same (as compared by the {@code ==}
* operator)
* <li>Applying the method {@link java.lang.Character#toUpperCase(char)} to
* each character produces the same result
* <li>Applying the method {@link java.lang.Character#toLowerCase(char)} to
* each character produces the same result
* </ul>
*
* @param o
* The {@code LangString} to compare this {@code LangString} against
*
* @return {@code true} if the argument is not {@code null} and it
* represents an equivalent {@code LangString} ignoring case;
* {@code false} otherwise
*
* @see #equals(Object)
*/
public boolean equalsIgnoreCase(Object o) {
if (this == o)
return true;
if (o instanceof LangString) {
LangString other = (LangString) o;
if (!toString().equalsIgnoreCase(other.toString()))
return false;
if (!getLang().equalsIgnoreCase(other.getLang()))
return false;
return true;
}
return false;
}
/**
* Compares two strings lexicographically. The comparison is based on the
* Unicode value of each character in the strings. The character sequence
* represented by this <code>String</code> object is compared
* lexicographically to the character sequence represented by the argument
* string. The result is a negative integer if this <code>String</code>
* object lexicographically precedes the argument string. The result is a
* positive integer if this <code>String</code> object lexicographically
* follows the argument string. The result is zero if the strings are equal;
* <code>compareTo</code> returns <code>0</code> exactly when the
* {@link #equals(Object)} method would return <code>true</code>.
* <p>
* This is the definition of lexicographic ordering. If two strings are
* different, then either they have different characters at some index that
* is a valid index for both strings, or their lengths are different, or
* both. If they have different characters at one or more index positions,
* let <i>k</i> be the smallest such index; then the string whose character
* at position <i>k</i> has the smaller value, as determined by using the
* < operator, lexicographically precedes the other string. In this case,
* <code>compareTo</code> returns the difference of the two character values
* at position <code>k</code> in the two string -- that is, the value:
* <blockquote>
*
* <pre>
* this.charAt(k) - anotherString.charAt(k)
* </pre>
*
* </blockquote> If there is no index position at which they differ, then
* the shorter string lexicographically precedes the longer string. In this
* case, <code>compareTo</code> returns the difference of the lengths of the
* strings -- that is, the value: <blockquote>
*
* <pre>
* this.length() - anotherString.length()
* </pre>
*
* </blockquote>
*
* @param o
* the <code>String</code> to be compared.
* @return the value <code>0</code> if the argument string is equal to this
* string; a value less than <code>0</code> if this string is
* lexicographically less than the string argument; and a value
* greater than <code>0</code> if this string is lexicographically
* greater than the string argument.
*/
public int compareTo(LangString o) {
if (this == o)
return 0;
if (o == null)
return 1;
int result = this.getLang().compareToIgnoreCase(o.getLang());
if (result == 0)
return this.toString().compareTo(o.toString());
return result;
}
/**
* Compares two strings lexicographically, ignoring case differences. This
* method returns an integer whose sign is that of calling
* <code>compareTo</code> with normalized versions of the strings where case
* differences have been eliminated by calling
* <code>Character.toLowerCase(Character.toUpperCase(character))</code> on
* each character.
* <p>
* Note that this method does <em>not</em> take locale into account, and
* will result in an unsatisfactory ordering for certain locales. The
* java.text package provides <em>collators</em> to allow locale-sensitive
* ordering.
*
* @param o
* the <code>LangString</code> to be compared.
* @return a negative integer, zero, or a positive integer as the specified
* String is greater than, equal to, or less than this String,
* ignoring case considerations.
* @see java.text.Collator#compare(String, String)
*/
public int compareToIgnoreCase(LangString o) {
if (this == o)
return 0;
if (o == null)
return 1;
int result = this.getLang().compareToIgnoreCase(o.getLang());
if (result == 0)
return this.toString().compareToIgnoreCase(o.toString());
return result;
}
/**
* Extended filtering compares extended language ranges to language tags.
* Each extended language range in the language priority list is considered
* in turn, according to priority. A language range matches a particular
* language tag if each respective list of subtags matches.
*
* Two subtags match if either they are the same when compared
* case-insensitively or the language range's subtag is the wildcard '*'.
*
* See http://tools.ietf.org/html/rfc4647
*
* @param range
* In a language range, each subtag MUST either be a sequence of
* ASCII alphanumeric characters or the single character '*'
* (%x2A, ASTERISK). The character '*' is a "wildcard" that
* matches any sequence of subtags. The meaning and uses of
* wildcards vary according to the type of language range.
* @return true if this has a language tag that matches the extended
* language range given; otherwise, false
*/
public boolean matchesLang(String range) {
// 1. Split both the extended language range and the language tag being
// compared into a list of subtags by dividing on the hyphen (%x2D)
// character.
String[] subtags = getLang().split("-");
String[] subranges = range.split("-");
// 2. Begin with the first subtag in each list. If the first subtag in
// the range does not match the first subtag in the tag, the overall
// match fails. Otherwise, move to the next subtag in both the
// range and the tag.
if (!subtags[0].equalsIgnoreCase(subranges[0]))
return false;
// 3. While there are more subtags left in the language range's list:
int r = 1, t = 1;
while (r < subranges.length) {
// A. If the subtag currently being examined in the range is the
// wildcard ('*'), move to the next subtag in the range and
// continue with the loop.
if ("*".equals(subranges[r])) {
r++;
continue;
}
// B. Else, if there are no more subtags in the language tag's
// list, the match fails.
if (t >= subtags.length)
return false;
// C. Else, if the current subtag in the range's list matches the
// current subtag in the language tag's list, move to the next
// subtag in both lists and continue with the loop.
if (subranges[r].equalsIgnoreCase(subtags[t])) {
r++;
t++;
continue;
}
// D. Else, if the language tag's subtag is a "singleton" (a single
// letter or digit, which includes the private-use subtag 'x')
// the match fails.
if (subtags[t].length() == 1)
return false;
// E. Else, move to the next subtag in the language tag's list and
// continue with the loop.
t++;
continue;
}
// 4. When the language range's list has no more subtags, the match
// succeeds.
return true;
}
/**
* Concatenates the specified string to the end of this string.
* <p>
* If the length of the argument string is <code>0</code>, then this
* <code>String</code> object is returned. Otherwise, a new
* <code>String</code> object is created, representing a character sequence
* that is the concatenation of the character sequence represented by this
* <code>String</code> object and the character sequence represented by the
* argument string.
* <p>
* Examples: <blockquote>
*
* <pre>
* "cares".concat("s") returns "caress"
* "to".concat("get").concat("her") returns "together"
* </pre>
*
* </blockquote>
*
* @param str
* the <code>String</code> that is concatenated to the end of
* this <code>String</code>.
* @return a string that represents the concatenation of this object's
* characters followed by the string argument's characters.
* @throws IllegalArgumentException
* if the languages are different
*/
public LangString concat(LangString str) {
String concat = toString().concat(str.toString());
// check for same lang tag
String l1 = getLang();
String l2 = str.getLang();
if (l1.equalsIgnoreCase(l2))
return new LangString(concat, l1);
// check for semantic subset
if (str.matchesLang(l1))
return new LangString(concat, l1);
if (matchesLang(l2))
return new LangString(concat, l2);
// use common prefix
String prefix = l1.length() < l2.length() ? l1 : l2;
String other = l1.length() < l2.length() ? l2 : l1;
String common = "";
int i = prefix.indexOf('-');
for (; i >= 0; i = prefix.indexOf('-', i + 1)) {
String substring = prefix.substring(0, i + 1);
if (!substring.equalsIgnoreCase(other.substring(0, i + 1)))
break;
common = substring;
}
if (common.length() < 1)
throw new IllegalArgumentException("Different languages cannot be concatenated: " + l1 + " and " + l2);
return new LangString(concat, common);
}
/**
* Concatenates the specified string to the end of this string.
* <p>
* If the length of the argument string is <code>0</code>, then this
* <code>String</code> object is returned. Otherwise, a new
* <code>String</code> object is created, representing a character sequence
* that is the concatenation of the character sequence represented by this
* <code>String</code> object and the character sequence represented by the
* argument string.
* <p>
* Examples: <blockquote>
*
* <pre>
* "cares".concat("s") returns "caress"
* "to".concat("get").concat("her") returns "together"
* </pre>
*
* </blockquote>
*
* @param str
* the <code>String</code> that is concatenated to the end of
* this <code>String</code>.
* @return a string that represents the concatenation of this object's
* characters followed by the string argument's characters.
*/
public LangString concat(String str) {
return new LangString(toString().concat(str), getLang());
}
/**
* Returns a new character sequence that is a subsequence of this sequence.
*
* <p>
* An invocation of this method of the form
*
* <blockquote>
*
* <pre>
* str.subSequence(begin, end)
* </pre>
*
* </blockquote>
*
* behaves in exactly the same way as the invocation
*
* <blockquote>
*
* <pre>
* str.substring(begin, end)
* </pre>
*
* </blockquote>
*
* This method is defined so that the <tt>String</tt> class can implement
* the {@link CharSequence} interface.
* </p>
*
* @param start
* the begin index, inclusive.
* @param end
* the end index, exclusive.
* @return the specified subsequence.
*
* @throws IndexOutOfBoundsException
* if <tt>beginIndex</tt> or <tt>endIndex</tt> are negative, if
* <tt>endIndex</tt> is greater than <tt>length()</tt>, or if
* <tt>beginIndex</tt> is greater than <tt>startIndex</tt>
*/
public LangString subSequence(int start, int end) {
return new LangString(toString().substring(start, end), getLang());
}
/**
* Returns a new string that is a substring of this string. The substring
* begins with the character at the specified index and extends to the end
* of this string.
* <p>
* Examples: <blockquote>
*
* <pre>
* "unhappy".substring(2) returns "happy"
* "Harbison".substring(3) returns "bison"
* "emptiness".substring(9) returns "" (an empty string)
* </pre>
*
* </blockquote>
*
* @param beginIndex
* the beginning index, inclusive.
* @return the specified substring.
* @exception IndexOutOfBoundsException
* if <code>beginIndex</code> is negative or larger than the
* length of this <code>String</code> object.
*/
public LangString substring(int beginIndex) {
return new LangString(toString().substring(beginIndex), getLang());
}
/**
* Returns a new string that is a substring of this string. The substring
* begins at the specified <code>beginIndex</code> and extends to the
* character at index <code>endIndex - 1</code>. Thus the length of the
* substring is <code>endIndex-beginIndex</code>.
* <p>
* Examples: <blockquote>
*
* <pre>
* "hamburger".substring(4, 8) returns "urge"
* "smiles".substring(1, 5) returns "mile"
* </pre>
*
* </blockquote>
*
* @param beginIndex
* the beginning index, inclusive.
* @param endIndex
* the ending index, exclusive.
* @return the specified substring.
* @exception IndexOutOfBoundsException
* if the <code>beginIndex</code> is negative, or
* <code>endIndex</code> is larger than the length of this
* <code>String</code> object, or <code>beginIndex</code> is
* larger than <code>endIndex</code>.
*/
public LangString substring(int beginIndex, int endIndex) {
return new LangString(toString().substring(beginIndex, endIndex),
getLang());
}
/**
* Returns a new string resulting from replacing all occurrences of
* <code>oldChar</code> in this string with <code>newChar</code>.
* <p>
* If the character <code>oldChar</code> does not occur in the character
* sequence represented by this <code>String</code> object, then a reference
* to this <code>String</code> object is returned. Otherwise, a new
* <code>String</code> object is created that represents a character
* sequence identical to the character sequence represented by this
* <code>String</code> object, except that every occurrence of
* <code>oldChar</code> is replaced by an occurrence of <code>newChar</code>.
* <p>
* Examples: <blockquote>
*
* <pre>
* "mesquite in your cellar".replace('e', 'o')
* returns "mosquito in your collar"
* "the war of baronets".replace('r', 'y')
* returns "the way of bayonets"
* "sparring with a purple porpoise".replace('p', 't')
* returns "starring with a turtle tortoise"
* "JonL".replace('q', 'x') returns "JonL" (no change)
* </pre>
*
* </blockquote>
*
* @param oldChar
* the old character.
* @param newChar
* the new character.
* @return a string derived from this string by replacing every occurrence
* of <code>oldChar</code> with <code>newChar</code>.
*/
public LangString replace(char oldChar, char newChar) {
return new LangString(toString().replace(oldChar, newChar), getLang());
}
/**
* Replaces the first substring of this string that matches the given <a
* href="../util/regex/Pattern.html#sum">regular expression</a> with the
* given replacement.
*
* <p>
* An invocation of this method of the form <i>str</i>
* <tt>.replaceFirst(</tt><i>regex</i><tt>,</tt> <i>repl</i><tt>)</tt>
* yields exactly the same result as the expression
*
* <blockquote><tt>
* {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#compile
* compile}(</tt><i>regex</i><tt>).{@link
* java.util.regex.Pattern#matcher(java.lang.CharSequence)
* matcher}(</tt><i>str</i><tt>).{@link java.util.regex.Matcher#replaceFirst
* replaceFirst}(</tt><i>repl</i><tt>)</tt></blockquote>
*
* <p>
* Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in the
* replacement string may cause the results to be different than if it were
* being treated as a literal replacement string; see
* {@link java.util.regex.Matcher#replaceFirst}. Use
* {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
* meaning of these characters, if desired.
*
* @param regex
* the regular expression to which this string is to be matched
* @param replacement
* the string to be substituted for the first match
*
* @return The resulting <tt>String</tt>
*
* @throws PatternSyntaxException
* if the regular expression's syntax is invalid
*
* @see java.util.regex.Pattern
*/
public LangString replaceFirst(String regex, String replacement) {
return new LangString(toString().replaceFirst(regex, replacement),
getLang());
}
/**
* Replaces each substring of this string that matches the given <a
* href="../util/regex/Pattern.html#sum">regular expression</a> with the
* given replacement.
*
* <p>
* An invocation of this method of the form <i>str</i><tt>.replaceAll(</tt>
* <i>regex</i><tt>,</tt> <i>repl</i><tt>)</tt> yields exactly the same
* result as the expression
*
* <blockquote><tt>
* {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#compile
* compile}(</tt><i>regex</i><tt>).{@link
* java.util.regex.Pattern#matcher(java.lang.CharSequence)
* matcher}(</tt><i>str</i><tt>).{@link java.util.regex.Matcher#replaceAll
* replaceAll}(</tt><i>repl</i><tt>)</tt></blockquote>
*
* <p>
* Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in the
* replacement string may cause the results to be different than if it were
* being treated as a literal replacement string; see
* {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}. Use
* {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
* meaning of these characters, if desired.
*
* @param regex
* the regular expression to which this string is to be matched
* @param replacement
* the string to be substituted for each match
*
* @return The resulting <tt>String</tt>
*
* @throws PatternSyntaxException
* if the regular expression's syntax is invalid
*
* @see java.util.regex.Pattern
*/
public LangString replaceAll(String regex, String replacement) {
return new LangString(toString().replaceAll(regex, replacement),
getLang());
}
/**
* Replaces each substring of this string that matches the literal target
* sequence with the specified literal replacement sequence. The replacement
* proceeds from the beginning of the string to the end, for example,
* replacing "aa" with "b" in the string "aaa" will result in "ba" rather
* than "ab".
*
* @param target
* The sequence of char values to be replaced
* @param replacement
* The replacement sequence of char values
* @return The resulting string
* @throws NullPointerException
* if <code>target</code> or <code>replacement</code> is
* <code>null</code>.
*/
public LangString replace(CharSequence target, CharSequence replacement) {
return new LangString(toString().replace(target, replacement),
getLang());
}
/**
* Splits this string around matches of the given <a
* href="../util/regex/Pattern.html#sum">regular expression</a>.
*
* <p>
* The array returned by this method contains each substring of this string
* that is terminated by another substring that matches the given expression
* or is terminated by the end of the string. The substrings in the array
* are in the order in which they occur in this string. If the expression
* does not match any part of the input then the resulting array has just
* one element, namely this string.
*
* <p>
* The <tt>limit</tt> parameter controls the number of times the pattern is
* applied and therefore affects the length of the resulting array. If the
* limit <i>n</i> is greater than zero then the pattern will be applied at
* most <i>n</i> - 1 times, the array's length will be no greater
* than <i>n</i>, and the array's last entry will contain all input beyond
* the last matched delimiter. If <i>n</i> is non-positive then the pattern
* will be applied as many times as possible and the array can have any
* length. If <i>n</i> is zero then the pattern will be applied as many
* times as possible, the array can have any length, and trailing empty
* strings will be discarded.
*
* <p>
* The string <tt>"boo:and:foo"</tt>, for example, yields the following
* results with these parameters:
*
* <blockquote>
* <table cellpadding=1 cellspacing=0 summary="Split example showing regex, limit, and result">
* <tr>
* <th>Regex</th>
* <th>Limit</th>
* <th>Result</th>
* </tr>
* <tr>
* <td align=center>:</td>
* <td align=center>2</td>
* <td><tt>{ "boo", "and:foo" }</tt></td>
* </tr>
* <tr>
* <td align=center>:</td>
* <td align=center>5</td>
* <td><tt>{ "boo", "and", "foo" }</tt></td>
* </tr>
* <tr>
* <td align=center>:</td>
* <td align=center>-2</td>
* <td><tt>{ "boo", "and", "foo" }</tt></td>
* </tr>
* <tr>
* <td align=center>o</td>
* <td align=center>5</td>
* <td><tt>{ "b", "", ":and:f", "", "" }</tt></td>
* </tr>
* <tr>
* <td align=center>o</td>
* <td align=center>-2</td>
* <td><tt>{ "b", "", ":and:f", "", "" }</tt></td>
* </tr>
* <tr>
* <td align=center>o</td>
* <td align=center>0</td>
* <td><tt>{ "b", "", ":and:f" }</tt></td>
* </tr>
* </table>
* </blockquote>
*
* <p>
* An invocation of this method of the form <i>str.</i><tt>split(</tt>
* <i>regex</i><tt>,</tt> <i>n</i><tt>)</tt> yields the same result as
* the expression
*
* <blockquote> {@link java.util.regex.Pattern}.
* {@link java.util.regex.Pattern#compile compile}<tt>(</tt><i>regex</i>
* <tt>)</tt>.
* {@link java.util.regex.Pattern#split(java.lang.CharSequence,int) split}
* <tt>(</tt><i>str</i><tt>,</tt> <i>n</i><tt>)</tt> </blockquote>
*
*
* @param regex
* the delimiting regular expression
*
* @param limit
* the result threshold, as described above
*
* @return the array of strings computed by splitting this string around
* matches of the given regular expression
*
* @throws PatternSyntaxException
* if the regular expression's syntax is invalid
*
* @see java.util.regex.Pattern
*/
public LangString[] split(String regex, int limit) {
String[] split = toString().split(regex, limit);
LangString[] result = new LangString[split.length];
for (int i = 0; i < split.length; i++) {
result[i] = new LangString(split[i], getLang());
}
return result;
}
/**
* Splits this string around matches of the given <a
* href="../util/regex/Pattern.html#sum">regular expression</a>.
*
* <p>
* This method works as if by invoking the two-argument
* {@link #split(String, int) split} method with the given expression and a
* limit argument of zero. Trailing empty strings are therefore not included
* in the resulting array.
*
* <p>
* The string <tt>"boo:and:foo"</tt>, for example, yields the following
* results with these expressions:
*
* <blockquote>
* <table cellpadding=1 cellspacing=0 summary="Split examples showing regex and result">
* <tr>
* <th>Regex</th>
* <th>Result</th>
* </tr>
* <tr>
* <td align=center>:</td>
* <td><tt>{ "boo", "and", "foo" }</tt></td>
* </tr>
* <tr>
* <td align=center>o</td>
* <td><tt>{ "b", "", ":and:f" }</tt></td>
* </tr>
* </table>
* </blockquote>
*
*
* @param regex
* the delimiting regular expression
*
* @return the array of strings computed by splitting this string around
* matches of the given regular expression
*
* @throws PatternSyntaxException
* if the regular expression's syntax is invalid
*
* @see java.util.regex.Pattern
*/
public LangString[] split(String regex) {
return split(regex, 0);
}
/**
* Converts all of the characters in this <code>String</code> to lower case
* using the rules of the given <code>Locale</code>. Case mapping is based
* on the Unicode Standard version specified by the
* {@link java.lang.Character Character} class. Since case mappings are not
* always 1:1 char mappings, the resulting <code>String</code> may be a
* different length than the original <code>String</code>.
* <p>
* Examples of lowercase mappings are in the following table:
* <table border="1" summary="Lowercase mapping examples showing language code of locale, upper case, lower case, and description">
* <tr>
* <th>Language Code of Locale</th>
* <th>Upper Case</th>
* <th>Lower Case</th>
* <th>Description</th>
* </tr>
* <tr>
* <td>tr (Turkish)</td>
* <td>\u0130</td>
* <td>\u0069</td>
* <td>capital letter I with dot above -> small letter i</td>
* </tr>
* <tr>
* <td>tr (Turkish)</td>
* <td>\u0049</td>
* <td>\u0131</td>
* <td>capital letter I -> small letter dotless i</td>
* </tr>
* <tr>
* <td>(all)</td>
* <td>French Fries</td>
* <td>french fries</td>
* <td>lowercased all chars in String</td>
* </tr>
* <tr>
* <td>(all)</td>
* <td><img src="doc-files/capiota.gif" alt="capiota"><img
* src="doc-files/capchi.gif" alt="capchi"> <img
* src="doc-files/captheta.gif" alt="captheta"><img
* src="doc-files/capupsil.gif" alt="capupsil"> <img
* src="doc-files/capsigma.gif" alt="capsigma"></td>
* <td><img src="doc-files/iota.gif" alt="iota"><img src="doc-files/chi.gif"
* alt="chi"> <img src="doc-files/theta.gif" alt="theta"><img
* src="doc-files/upsilon.gif" alt="upsilon"> <img
* src="doc-files/sigma1.gif" alt="sigma"></td>
* <td>lowercased all chars in String</td>
* </tr>
* </table>
*
* @return the <code>String</code>, converted to lowercase.
* @see java.lang.String#toLowerCase()
* @see java.lang.String#toUpperCase()
* @see java.lang.String#toUpperCase(Locale)
*/
public LangString toLowerCase() {
return new LangString(toString().toLowerCase(getLocale()), getLang());
}
/**
* Converts all of the characters in this <code>String</code> to upper case
* using the rules of the given <code>Locale</code>. Case mapping is based
* on the Unicode Standard version specified by the
* {@link java.lang.Character Character} class. Since case mappings are not
* always 1:1 char mappings, the resulting <code>String</code> may be a
* different length than the original <code>String</code>.
* <p>
* Examples of locale-sensitive and 1:M case mappings are in the following
* table.
* <p>
* <table border="1" summary="Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.">
* <tr>
* <th>Language Code of Locale</th>
* <th>Lower Case</th>
* <th>Upper Case</th>
* <th>Description</th>
* </tr>
* <tr>
* <td>tr (Turkish)</td>
* <td>\u0069</td>
* <td>\u0130</td>
* <td>small letter i -> capital letter I with dot above</td>
* </tr>
* <tr>
* <td>tr (Turkish)</td>
* <td>\u0131</td>
* <td>\u0049</td>
* <td>small letter dotless i -> capital letter I</td>
* </tr>
* <tr>
* <td>(all)</td>
* <td>\u00df</td>
* <td>\u0053 \u0053</td>
* <td>small letter sharp s -> two letters: SS</td>
* </tr>
* <tr>
* <td>(all)</td>
* <td>Fahrvergnügen</td>
* <td>FAHRVERGNÜGEN</td>
* <td></td>
* </tr>
* </table>
*
* @return the <code>String</code>, converted to uppercase.
* @see java.lang.String#toUpperCase()
* @see java.lang.String#toLowerCase()
* @see java.lang.String#toLowerCase(Locale)
*/
public LangString toUpperCase() {
return new LangString(toString().toUpperCase(getLocale()), getLang());
}
/**
* Returns a copy of the string, with leading and trailing whitespace
* omitted.
* <p>
* If this <code>String</code> object represents an empty character
* sequence, or the first and last characters of character sequence
* represented by this <code>String</code> object both have codes greater
* than <code>'\u0020'</code> (the space character), then a reference to
* this <code>String</code> object is returned.
* <p>
* Otherwise, if there is no character with a code greater than
* <code>'\u0020'</code> in the string, then a new <code>String</code>
* object representing an empty string is created and returned.
* <p>
* Otherwise, let <i>k</i> be the index of the first character in the string
* whose code is greater than <code>'\u0020'</code>, and let <i>m</i> be
* the index of the last character in the string whose code is greater than
* <code>'\u0020'</code>. A new <code>String</code> object is created,
* representing the substring of this string that begins with the character
* at index <i>k</i> and ends with the character at index <i>m</i>-that is,
* the result of <code>this.substring(<i>k</i>, <i>m</i>+1)</code>.
* <p>
* This method may be used to trim whitespace (as defined above) from the
* beginning and end of a string.
*
* @return A copy of this string with leading and trailing white space
* removed, or this string if it has no leading or trailing white
* space.
*/
public LangString trim() {
return new LangString(toString().trim(), getLang());
}
//////////////////////////////// Delegate toString() ////////////////////////////////
/**
* Returns the length of this string.
* The length is equal to the number of <a href="Character.html#unicode">Unicode
* code units</a> in the string.
*
* @return the length of the sequence of characters represented by this
* object.
*/
public int length() {
return toString().length();
}
/**
* Returns the <code>char</code> value at the
* specified index. An index ranges from <code>0</code> to
* <code>length() - 1</code>. The first <code>char</code> value of the sequence
* is at index <code>0</code>, the next at index <code>1</code>,
* and so on, as for array indexing.
*
* <p>If the <code>char</code> value specified by the index is a
* <a href="Character.html#unicode">surrogate</a>, the surrogate
* value is returned.
*
* @param index the index of the <code>char</code> value.
* @return the <code>char</code> value at the specified index of this string.
* The first <code>char</code> value is at index <code>0</code>.
* @exception IndexOutOfBoundsException if the <code>index</code>
* argument is negative or not less than the length of this
* string.
*/
public char charAt(int index) {
return toString().charAt(index);
}
/**
* Returns <tt>true</tt> if, and only if, {@link #length()} is <tt>0</tt>.
*
* @return <tt>true</tt> if {@link #length()} is <tt>0</tt>, otherwise
* <tt>false</tt>
*/
public final boolean isEmpty() {
return toString().isEmpty();
}
/**
* Compares this string to the specified {@code CharSequence}. The result
* is {@code true} if and only if this {@code LangString} represents the same
* sequence of char values as the specified sequence.
*
* @param cs
* The sequence to compare this {@code LangString} against
*
* @return {@code true} if this {@code LangString} represents the same
* sequence of char values as the specified sequence, {@code
* false} otherwise
*/
public final boolean contentEquals(CharSequence cs) {
return toString().contentEquals(cs);
}
/**
* Tests if two string regions are equal.
* <p>
* A substring of this <tt>String</tt> object is compared to a substring
* of the argument other. The result is true if these substrings
* represent identical character sequences. The substring of this
* <tt>String</tt> object to be compared begins at index <tt>toffset</tt>
* and has length <tt>len</tt>. The substring of other to be compared
* begins at index <tt>ooffset</tt> and has length <tt>len</tt>. The
* result is <tt>false</tt> if and only if at least one of the following
* is true:
* <ul><li><tt>toffset</tt> is negative.
* <li><tt>ooffset</tt> is negative.
* <li><tt>toffset+len</tt> is greater than the length of this
* <tt>String</tt> object.
* <li><tt>ooffset+len</tt> is greater than the length of the other
* argument.
* <li>There is some nonnegative integer <i>k</i> less than <tt>len</tt>
* such that:
* <tt>this.charAt(toffset+<i>k</i>) != other.charAt(ooffset+<i>k</i>)</tt>
* </ul>
*
* @param toffset the starting offset of the subregion in this string.
* @param other the string argument.
* @param ooffset the starting offset of the subregion in the string
* argument.
* @param len the number of characters to compare.
* @return <code>true</code> if the specified subregion of this string
* exactly matches the specified subregion of the string argument;
* <code>false</code> otherwise.
*/
public final boolean regionMatches(int toffset, CharSequence other, int ooffset, int len) {
return toString().regionMatches(toffset, other.toString(), ooffset, len);
}
/**
* Tests if two string regions are equal.
* <p>
* A substring of this <tt>String</tt> object is compared to a substring
* of the argument <tt>other</tt>. The result is <tt>true</tt> if these
* substrings represent character sequences that are the same, ignoring
* case if and only if <tt>ignoreCase</tt> is true. The substring of
* this <tt>String</tt> object to be compared begins at index
* <tt>toffset</tt> and has length <tt>len</tt>. The substring of
* <tt>other</tt> to be compared begins at index <tt>ooffset</tt> and
* has length <tt>len</tt>. The result is <tt>false</tt> if and only if
* at least one of the following is true:
* <ul><li><tt>toffset</tt> is negative.
* <li><tt>ooffset</tt> is negative.
* <li><tt>toffset+len</tt> is greater than the length of this
* <tt>String</tt> object.
* <li><tt>ooffset+len</tt> is greater than the length of the other
* argument.
* <li><tt>ignoreCase</tt> is <tt>false</tt> and there is some nonnegative
* integer <i>k</i> less than <tt>len</tt> such that:
* <blockquote><pre>
* this.charAt(toffset+k) != other.charAt(ooffset+k)
* </pre></blockquote>
* <li><tt>ignoreCase</tt> is <tt>true</tt> and there is some nonnegative
* integer <i>k</i> less than <tt>len</tt> such that:
* <blockquote><pre>
* Character.toLowerCase(this.charAt(toffset+k)) !=
Character.toLowerCase(other.charAt(ooffset+k))
* </pre></blockquote>
* and:
* <blockquote><pre>
* Character.toUpperCase(this.charAt(toffset+k)) !=
* Character.toUpperCase(other.charAt(ooffset+k))
* </pre></blockquote>
* </ul>
*
* @param ignoreCase if <code>true</code>, ignore case when comparing
* characters.
* @param toffset the starting offset of the subregion in this
* string.
* @param other the string argument.
* @param ooffset the starting offset of the subregion in the string
* argument.
* @param len the number of characters to compare.
* @return <code>true</code> if the specified subregion of this string
* matches the specified subregion of the string argument;
* <code>false</code> otherwise. Whether the matching is exact
* or case insensitive depends on the <code>ignoreCase</code>
* argument.
*/
public final boolean regionMatches(boolean ignoreCase, int toffset, CharSequence other,
int ooffset, int len) {
return toString().regionMatches(ignoreCase, toffset, other.toString(), ooffset, len);
}
/**
* Tests if the substring of this string beginning at the
* specified index starts with the specified prefix.
*
* @param prefix the prefix.
* @param toffset where to begin looking in this string.
* @return <code>true</code> if the character sequence represented by the
* argument is a prefix of the substring of this object starting
* at index <code>toffset</code>; <code>false</code> otherwise.
* The result is <code>false</code> if <code>toffset</code> is
* negative or greater than the length of this
* <code>String</code> object; otherwise the result is the same
* as the result of the expression
* <pre>
* this.substring(toffset).startsWith(prefix)
* </pre>
*/
public final boolean startsWith(CharSequence prefix, int toffset) {
return toString().startsWith(prefix.toString(), toffset);
}
/**
* Tests if this string starts with the specified prefix.
*
* @param prefix the prefix.
* @return <code>true</code> if the character sequence represented by the
* argument is a prefix of the character sequence represented by
* this string; <code>false</code> otherwise.
* Note also that <code>true</code> will be returned if the
* argument is an empty string or is equal to this
* <code>String</code> object as determined by the
* {@link #equals(Object)} method.
*/
public final boolean startsWith(CharSequence prefix) {
return toString().startsWith(prefix.toString());
}
/**
* Tests if this string ends with the specified suffix.
*
* @param suffix the suffix.
* @return <code>true</code> if the character sequence represented by the
* argument is a suffix of the character sequence represented by
* this object; <code>false</code> otherwise. Note that the
* result will be <code>true</code> if the argument is the
* empty string or is equal to this <code>String</code> object
* as determined by the {@link #equals(Object)} method.
*/
public final boolean endsWith(CharSequence suffix) {
return toString().endsWith(suffix.toString());
}
/**
* Returns the index within this string of the first occurrence of
* the specified character. If a character with value
* <code>ch</code> occurs in the character sequence represented by
* this <code>String</code> object, then the index (in Unicode
* code units) of the first such occurrence is returned. For
* values of <code>ch</code> in the range from 0 to 0xFFFF
* (inclusive), this is the smallest value <i>k</i> such that:
* <blockquote><pre>
* this.charAt(<i>k</i>) == ch
* </pre></blockquote>
* is true. For other values of <code>ch</code>, it is the
* smallest value <i>k</i> such that:
* <blockquote><pre>
* this.codePointAt(<i>k</i>) == ch
* </pre></blockquote>
* is true. In either case, if no such character occurs in this
* string, then <code>-1</code> is returned.
*
* @param ch a character (Unicode code point).
* @return the index of the first occurrence of the character in the
* character sequence represented by this object, or
* <code>-1</code> if the character does not occur.
*/
public final int indexOf(int ch) {
return toString().indexOf(ch);
}
/**
* Returns the index within this string of the first occurrence of
* the specified character. If a character with value
* <code>ch</code> occurs in the character sequence represented by
* this <code>String</code> object, then the index (in Unicode
* code units) of the first such occurrence is returned. For
* values of <code>ch</code> in the range from 0 to 0xFFFF
* (inclusive), this is the smallest value <i>k</i> such that:
* <blockquote><pre>
* this.charAt(<i>k</i>) == ch
* </pre></blockquote>
* is true. For other values of <code>ch</code>, it is the
* smallest value <i>k</i> such that:
* <blockquote><pre>
* this.codePointAt(<i>k</i>) == ch
* </pre></blockquote>
* is true. In either case, if no such character occurs in this
* string, then <code>-1</code> is returned.
*
* @param ch a character (Unicode code point).
* @return the index of the first occurrence of the character in the
* character sequence represented by this object, or
* <code>-1</code> if the character does not occur.
*/
public final int indexOf(int ch, int fromIndex) {
return toString().indexOf(ch, fromIndex);
}
/**
* Returns the index within this string of the last occurrence of
* the specified character. For values of <code>ch</code> in the
* range from 0 to 0xFFFF (inclusive), the index (in Unicode code
* units) returned is the largest value <i>k</i> such that:
* <blockquote><pre>
* this.charAt(<i>k</i>) == ch
* </pre></blockquote>
* is true. For other values of <code>ch</code>, it is the
* largest value <i>k</i> such that:
* <blockquote><pre>
* this.codePointAt(<i>k</i>) == ch
* </pre></blockquote>
* is true. In either case, if no such character occurs in this
* string, then <code>-1</code> is returned. The
* <code>String</code> is searched backwards starting at the last
* character.
*
* @param ch a character (Unicode code point).
* @return the index of the last occurrence of the character in the
* character sequence represented by this object, or
* <code>-1</code> if the character does not occur.
*/
public final int lastIndexOf(int ch) {
return toString().lastIndexOf(ch);
}
/**
* Returns the index within this string of the last occurrence of
* the specified character, searching backward starting at the
* specified index. For values of <code>ch</code> in the range
* from 0 to 0xFFFF (inclusive), the index returned is the largest
* value <i>k</i> such that:
* <blockquote><pre>
* (this.charAt(<i>k</i>) == ch) && (<i>k</i> <= fromIndex)
* </pre></blockquote>
* is true. For other values of <code>ch</code>, it is the
* largest value <i>k</i> such that:
* <blockquote><pre>
* (this.codePointAt(<i>k</i>) == ch) && (<i>k</i> <= fromIndex)
* </pre></blockquote>
* is true. In either case, if no such character occurs in this
* string at or before position <code>fromIndex</code>, then
* <code>-1</code> is returned.
*
* <p>All indices are specified in <code>char</code> values
* (Unicode code units).
*
* @param ch a character (Unicode code point).
* @param fromIndex the index to start the search from. There is no
* restriction on the value of <code>fromIndex</code>. If it is
* greater than or equal to the length of this string, it has
* the same effect as if it were equal to one less than the
* length of this string: this entire string may be searched.
* If it is negative, it has the same effect as if it were -1:
* -1 is returned.
* @return the index of the last occurrence of the character in the
* character sequence represented by this object that is less
* than or equal to <code>fromIndex</code>, or <code>-1</code>
* if the character does not occur before that point.
*/
public final int lastIndexOf(int ch, int fromIndex) {
return toString().lastIndexOf(ch, fromIndex);
}
/**
* Returns the index within this string of the first occurrence of the
* specified substring. The integer returned is the smallest value
* <i>k</i> such that:
* <blockquote><pre>
* this.startsWith(str, <i>k</i>)
* </pre></blockquote>
* is <code>true</code>.
*
* @param str any string.
* @return if the string argument occurs as a substring within this
* object, then the index of the first character of the first
* such substring is returned; if it does not occur as a
* substring, <code>-1</code> is returned.
*/
public final int indexOf(CharSequence str) {
return toString().indexOf(str.toString());
}
/**
* Returns the index within this string of the first occurrence of the
* specified substring, starting at the specified index. The integer
* returned is the smallest value <tt>k</tt> for which:
* <blockquote><pre>
* k >= Math.min(fromIndex, this.length()) && this.startsWith(str, k)
* </pre></blockquote>
* If no such value of <i>k</i> exists, then -1 is returned.
*
* @param str the substring for which to search.
* @param fromIndex the index from which to start the search.
* @return the index within this string of the first occurrence of the
* specified substring, starting at the specified index.
*/
public final int indexOf(CharSequence str, int fromIndex) {
return toString().indexOf(str.toString(), fromIndex);
}
/**
* Returns the index within this string of the rightmost occurrence
* of the specified substring. The rightmost empty string "" is
* considered to occur at the index value <code>this.length()</code>.
* The returned index is the largest value <i>k</i> such that
* <blockquote><pre>
* this.startsWith(str, k)
* </pre></blockquote>
* is true.
*
* @param str the substring to search for.
* @return if the string argument occurs one or more times as a substring
* within this object, then the index of the first character of
* the last such substring is returned. If it does not occur as
* a substring, <code>-1</code> is returned.
*/
public final int lastIndexOf(CharSequence str) {
return toString().lastIndexOf(str.toString());
}
/**
* Returns the index within this string of the last occurrence of the
* specified substring, searching backward starting at the specified index.
* The integer returned is the largest value <i>k</i> such that:
* <blockquote><pre>
* k <= Math.min(fromIndex, this.length()) && this.startsWith(str, k)
* </pre></blockquote>
* If no such value of <i>k</i> exists, then -1 is returned.
*
* @param str the substring to search for.
* @param fromIndex the index to start the search from.
* @return the index within this string of the last occurrence of the
* specified substring.
*/
public final int lastIndexOf(CharSequence str, int fromIndex) {
return toString().lastIndexOf(str.toString(), fromIndex);
}
/**
* Tells whether or not this string matches the given <a
* href="../util/regex/Pattern.html#sum">regular expression</a>.
*
* <p> An invocation of this method of the form
* <i>str</i><tt>.matches(</tt><i>regex</i><tt>)</tt> yields exactly the
* same result as the expression
*
* <blockquote><tt> {@link java.util.regex.Pattern}.{@link
* java.util.regex.Pattern#matches(String,CharSequence)
* matches}(</tt><i>regex</i><tt>,</tt> <i>str</i><tt>)</tt></blockquote>
*
* @param regex
* the regular expression to which this string is to be matched
*
* @return <tt>true</tt> if, and only if, this string matches the
* given regular expression
*
* @throws PatternSyntaxException
* if the regular expression's syntax is invalid
*
* @see java.util.regex.Pattern
*/
public final boolean matches(String regex) {
return toString().matches(regex);
}
/**
* Returns true if and only if this string contains the specified
* sequence of char values.
*
* @param s the sequence to search for
* @return true if this string contains <code>s</code>, false otherwise
* @throws NullPointerException if <code>s</code> is <code>null</code>
*/
public final boolean contains(CharSequence s) {
return toString().contains(s);
}
}