/* * ==================================================================== * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. * */ package org.apache.http.message; import java.util.BitSet; import org.apache.http.annotation.Immutable; import org.apache.http.util.CharArrayBuffer; /** * Low level parser for header field elements. The parsing routines of this class are designed * to produce near zero intermediate garbage and make no intermediate copies of input data. * <p> * This class is immutable and thread safe. * * @since 4.4 */ @Immutable public class TokenParser { public static BitSet INIT_BITSET(final int ... b) { final BitSet bitset = new BitSet(); for (final int aB : b) { bitset.set(aB); } return bitset; } /** US-ASCII CR, carriage return (13) */ public static final char CR = '\r'; /** US-ASCII LF, line feed (10) */ public static final char LF = '\n'; /** US-ASCII SP, space (32) */ public static final char SP = ' '; /** US-ASCII HT, horizontal-tab (9) */ public static final char HT = '\t'; /** Double quote */ public static final char DQUOTE = '\"'; /** Backward slash / escape character */ public static final char ESCAPE = '\\'; public static boolean isWhitespace(final char ch) { return ch == SP || ch == HT || ch == CR || ch == LF; } public static final TokenParser INSTANCE = new TokenParser(); /** * Extracts from the sequence of chars a token terminated with any of the given delimiters * discarding semantically insignificant whitespace characters. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the token * is not delimited by any character. */ public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) { final StringBuilder dst = new StringBuilder(); boolean whitespace = false; while (!cursor.atEnd()) { final char current = buf.charAt(cursor.getPos()); if (delimiters != null && delimiters.get(current)) { break; } else if (isWhitespace(current)) { skipWhiteSpace(buf, cursor); whitespace = true; } else { if (whitespace && dst.length() > 0) { dst.append(' '); } copyContent(buf, cursor, delimiters, dst); whitespace = false; } } return dst.toString(); } /** * Extracts from the sequence of chars a value which can be enclosed in quote marks and * terminated with any of the given delimiters discarding semantically insignificant * whitespace characters. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is not delimited by any character. */ public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) { final StringBuilder dst = new StringBuilder(); boolean whitespace = false; while (!cursor.atEnd()) { final char current = buf.charAt(cursor.getPos()); if (delimiters != null && delimiters.get(current)) { break; } else if (isWhitespace(current)) { skipWhiteSpace(buf, cursor); whitespace = true; } else if (current == DQUOTE) { if (whitespace && dst.length() > 0) { dst.append(' '); } copyQuotedContent(buf, cursor, dst); whitespace = false; } else { if (whitespace && dst.length() > 0) { dst.append(' '); } copyUnquotedContent(buf, cursor, delimiters, dst); whitespace = false; } } return dst.toString(); } /** * Skips semantically insignificant whitespace characters and moves the cursor to the closest * non-whitespace character. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer */ public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if (!isWhitespace(current)) { break; } else { pos++; } } cursor.updatePos(pos); } /** * Transfers content into the destination buffer until a whitespace character or any of * the given delimiters is encountered. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is delimited by a whitespace only. * @param dst destination buffer */ public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters, final StringBuilder dst) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) { break; } else { pos++; dst.append(current); } } cursor.updatePos(pos); } /** * Transfers content into the destination buffer until a whitespace character, a quote, * or any of the given delimiters is encountered. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is delimited by a whitespace or a quote only. * @param dst destination buffer */ public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters, final StringBuilder dst) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if ((delimiters != null && delimiters.get(current)) || isWhitespace(current) || current == DQUOTE) { break; } else { pos++; dst.append(current); } } cursor.updatePos(pos); } /** * Transfers content enclosed with quote marks into the destination buffer. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param dst destination buffer */ public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor, final StringBuilder dst) { if (cursor.atEnd()) { return; } int pos = cursor.getPos(); int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); char current = buf.charAt(pos); if (current != DQUOTE) { return; } pos++; indexFrom++; boolean escaped = false; for (int i = indexFrom; i < indexTo; i++, pos++) { current = buf.charAt(i); if (escaped) { if (current != DQUOTE && current != ESCAPE) { dst.append(ESCAPE); } dst.append(current); escaped = false; } else { if (current == DQUOTE) { pos++; break; } if (current == ESCAPE) { escaped = true; } else if (current != CR && current != LF) { dst.append(current); } } } cursor.updatePos(pos); } }