package batch.internal.support; import java.util.ArrayList; import java.util.List; import org.springframework.util.StringUtils; /** */ public class DelimitedLineTokenizer { /** * Convenient constant for the common case of a tab delimiter. */ public static final char DELIMITER_TAB = '\t'; /** * Convenient constant for the common case of a comma delimiter. */ public static final char DELIMITER_COMMA = ','; /** * Value returned for empty or null input. */ private static final String[] EMPTY_STRINGS = new String[] {}; /** * Convenient constant for the common case of a " escape character. */ public static final char DEFAULT_QUOTE_CHARACTER = '"'; /** * The delimiter character used when reading input. */ private final char delimiter; /** * Create a new instance of the {@link DelimitedLineTokenizer} class for the common case where the delimiter is a * {@link #DELIMITER_COMMA comma}. * * @see #DelimitedLineTokenizer(char) * @see #DELIMITER_COMMA */ public DelimitedLineTokenizer() { this(DELIMITER_COMMA); } /** * Create a new instance of the {@link DelimitedLineTokenizer} class. * * @param delimiter the desired delimiter */ public DelimitedLineTokenizer(char delimiter) { this.delimiter = delimiter; } /** * Yields the tokens resulting from the splitting of the supplied <code>line</code> along the configured * delimiter. * * <p> * Does not include the delimiter in the returned token array. * * <p> * Empty tokens are returned as empty strings, never <code>null</code>. * * @param line the line to be tokenised (can be <code>null</code>) * @return the resulting tokens; an empty <code>String[]</code> if no delimiter was found or if the supplied * <code>line</code> is <code>null</code> or zero length */ public String[] tokenize(String line) { if (!StringUtils.hasLength(line)) { return EMPTY_STRINGS; } char[] chars = line.toCharArray(); List<String> tokens = new ArrayList<String>(); boolean inQuoted = false; int lastCut = 0; char lastChar = 0; int length = chars.length; for (int i = 0; i < length; i++) { char currentChar = chars[i]; boolean isEnd = (i == length - 1); if ((isDelimiterCharacter(currentChar) && !inQuoted) || isEnd) { int endPosition = (isEnd ? length - lastCut : i - lastCut); if (isEnd && isDelimiterCharacter(currentChar)) { endPosition--; } if (isQuoteCharacter(lastChar) || isQuoteCharacter(currentChar)) { tokens.add(new String(chars, lastCut + 1, endPosition - 2)); } // handle case where delimiter is last character in the line else if (isEnd && (isDelimiterCharacter(currentChar))) { tokens.add(new String(chars, lastCut, endPosition)); tokens.add(""); } else { tokens.add(new String(chars, lastCut, endPosition)); } lastCut = i + 1; } else if (isQuoteCharacter(currentChar)) { inQuoted = !inQuoted; } lastChar = currentChar; } return tokens.toArray(new String[tokens.size()]); } /** * Is the supplied character the delimiter character? * * @param c the character to be checked * @return <code>true</code> if the supplied character is the delimiter character * @see DelimitedLineTokenizer#DelimitedLineTokenizer(char) */ private boolean isDelimiterCharacter(char c) { return c == this.delimiter; } /** * Is the supplied character a quote character? * * @param c the character to be checked * @return <code>true</code> if the supplied character is an quote character * @see #DEFAULT_QUOTE_CHARACTER */ protected boolean isQuoteCharacter(char c) { return c == DEFAULT_QUOTE_CHARACTER; } }