/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.lang3; import java.util.ArrayList; import java.util.List; /** * <p>Operations on {@link java.lang.String} that are * {@code null} safe.</p> * * <ul> * <li><b>IsEmpty/IsBlank</b> * - checks if a String contains text</li> * <li><b>Trim/Strip</b> * - removes leading and trailing whitespace</li> * <li><b>Equals</b> * - compares two strings null-safe</li> * <li><b>startsWith</b> * - check if a String starts with a prefix null-safe</li> * <li><b>endsWith</b> * - check if a String ends with a suffix null-safe</li> * <li><b>IndexOf/LastIndexOf/Contains</b> * - null-safe index-of checks * <li><b>IndexOfAny/LastIndexOfAny/IndexOfAnyBut/LastIndexOfAnyBut</b> * - index-of any of a set of Strings</li> * <li><b>ContainsOnly/ContainsNone/ContainsAny</b> * - does String contains only/none/any of these characters</li> * <li><b>Substring/Left/Right/Mid</b> * - null-safe substring extractions</li> * <li><b>SubstringBefore/SubstringAfter/SubstringBetween</b> * - substring extraction relative to other strings</li> * <li><b>Split/Join</b> * - splits a String into an array of substrings and vice versa</li> * <li><b>Remove/Delete</b> * - removes part of a String</li> * <li><b>Replace/Overlay</b> * - Searches a String and replaces one String with another</li> * <li><b>Chomp/Chop</b> * - removes the last part of a String</li> * <li><b>AppendIfMissing</b> * - appends a suffix to the end of the String if not present</li> * <li><b>PrependIfMissing</b> * - prepends a prefix to the start of the String if not present</li> * <li><b>LeftPad/RightPad/Center/Repeat</b> * - pads a String</li> * <li><b>UpperCase/LowerCase/SwapCase/Capitalize/Uncapitalize</b> * - changes the case of a String</li> * <li><b>CountMatches</b> * - counts the number of occurrences of one String in another</li> * <li><b>IsAlpha/IsNumeric/IsWhitespace/IsAsciiPrintable</b> * - checks the characters in a String</li> * <li><b>DefaultString</b> * - protects against a null input String</li> * <li><b>Reverse/ReverseDelimited</b> * - reverses a String</li> * <li><b>Abbreviate</b> * - abbreviates a string using ellipsis</li> * <li><b>Difference</b> * - compares Strings and reports on their differences</li> * <li><b>LevenshteinDistance</b> * - the number of changes needed to change one String into another</li> * </ul> * * <p>The {@code StringUtils} class defines certain words related to * String handling.</p> * * <ul> * <li>null - {@code null}</li> * <li>empty - a zero-length string ({@code ""})</li> * <li>space - the space character ({@code ' '}, char 32)</li> * <li>whitespace - the characters defined by {@link Character#isWhitespace(char)}</li> * <li>trim - the characters <= 32 as in {@link String#trim()}</li> * </ul> * * <p>{@code StringUtils} handles {@code null} input Strings quietly. * That is to say that a {@code null} input will return {@code null}. * Where a {@code boolean} or {@code int} is being returned * details vary by method.</p> * * <p>A side effect of the {@code null} handling is that a * {@code NullPointerException} should be considered a bug in * {@code StringUtils}.</p> * * <p>Methods in this class give sample code to explain their operation. * The symbol {@code *} is used to indicate any input including {@code null}.</p> * * <p>#ThreadSafe#</p> * @see java.lang.String * @since 1.0 * @version $Id$ */ /** * Simplied Version. * * @author Roque Pinel * */ // @Immutable public class StringUtils { /** * <p> * Splits the provided text into an array, separator specified. This is an * alternative to using StringTokenizer. * </p> * * <p> * The separator is not included in the returned String array. Adjacent * separators are treated as one separator. For more control over the split * use the StrTokenizer class. * </p> * * <p> * A {@code null} input String returns {@code null}. * </p> * * <pre> * StringUtils.split(null, *) = null * StringUtils.split("", *) = [] * StringUtils.split("a.b.c", '.') = ["a", "b", "c"] * StringUtils.split("a..b.c", '.') = ["a", "b", "c"] * StringUtils.split("a:b:c", '.') = ["a:b:c"] * StringUtils.split("a b c", ' ') = ["a", "b", "c"] * </pre> * * @param str * the String to parse, may be null * @param separatorChar * the character used as the delimiter * @return an array of parsed Strings, {@code null} if null String input * @since 2.0 */ public static String[] split(final String str, final char separatorChar) { return splitWorker(str, separatorChar, false); } /** * <p>Splits the provided text into an array, separator specified, * preserving all tokens, including empty tokens created by adjacent * separators. This is an alternative to using StringTokenizer.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * For more control over the split use the StrTokenizer class.</p> * * <p>A {@code null} input String returns {@code null}.</p> * * <pre> * StringUtils.splitPreserveAllTokens(null, *) = null * StringUtils.splitPreserveAllTokens("", *) = [] * StringUtils.splitPreserveAllTokens("a.b.c", '.') = ["a", "b", "c"] * StringUtils.splitPreserveAllTokens("a..b.c", '.') = ["a", "", "b", "c"] * StringUtils.splitPreserveAllTokens("a:b:c", '.') = ["a:b:c"] * StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"] * StringUtils.splitPreserveAllTokens("a b c", ' ') = ["a", "b", "c"] * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", ""] * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", "", ""] * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", a", "b", "c"] * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", "", a", "b", "c"] * StringUtils.splitPreserveAllTokens(" a b c ", ' ') = ["", a", "b", "c", ""] * </pre> * * @param str the String to parse, may be {@code null} * @param separatorChar the character used as the delimiter, * {@code null} splits on whitespace * @return an array of parsed Strings, {@code null} if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(final String str, final char separatorChar) { return splitWorker(str, separatorChar, true); } /** * Performs the logic for the {@code split} and * {@code splitPreserveAllTokens} methods that do not return a maximum array * length. * * @param str * the String to parse, may be {@code null} * @param separatorChar * the separate character * @param preserveAllTokens * if {@code true}, adjacent separators are treated as empty * token separators; if {@code false}, adjacent separators are * treated as one separator. * @return an array of parsed Strings, {@code null} if null String input */ private static String[] splitWorker(final String str, final char separatorChar, final boolean preserveAllTokens) { // Performance tuned for 2.0 (JDK1.4) if (str == null) { return null; } final int len = str.length(); if (len == 0) { return new String[0]; } final List list = new ArrayList(); int i = 0, start = 0; boolean match = false; boolean lastMatch = false; while (i < len) { if (str.charAt(i) == separatorChar) { if (match || preserveAllTokens) { list.add(str.substring(start, i)); match = false; lastMatch = true; } start = ++i; continue; } lastMatch = false; match = true; i++; } if (match || preserveAllTokens && lastMatch) { list.add(str.substring(start, i)); } return (String[]) list.toArray(new String[list.size()]); } }