/* * $Id$ * * Copyright 2006, The jCoderZ.org Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * Neither the name of the jCoderZ.org Project nor the names of * its contributors may be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.jcoderz.commons.logging; import java.text.FieldPosition; import java.text.Format; import java.text.ParsePosition; import java.util.Iterator; import java.nio.CharBuffer; import java.util.NoSuchElementException; /** * This Formatter formats the whitespace of a string, The space char * <code>'\u0020'</code> is left untouched, all other whitespace chars are * replaced and compressed by exactly one space char in a row. It extends the * Format type in a unsymmetric way: A formatted String cannot be parsed in a * way that the result is equal to the source string. * If allocated with a sub format, first the sub format is used for formatting, * the rewsult will be formatted by this. * In addition to the Format implementation it offers static access methods * for the format functionality. * */ public final class WhitespaceFormat extends Format { static final char PRESERVED_CHAR = '\u0020'; private final Format mSubFormat; private static final class WhitespaceIterator implements Iterator { private final CharBuffer mBuffer; /** * Constructs a white space iterator fo the supplied text. Skips over * initial whitespace. * * @param text The text which should be read line by line. */ private WhitespaceIterator (final CharBuffer buffer) { mBuffer = buffer.duplicate(); skip(); } /** {@inheritDoc} */ public void remove () { throw new UnsupportedOperationException(); } /** {@inheritDoc} */ public boolean hasNext () { return mBuffer.remaining() > 0; } /** * Gets the next character sequence up to the next whitespace char * (excluding), which is not the space character <code>' '</code>. * The character sequence being returned by this will never contain a * whitespace char but the space char <code>'\u0020'</code>. * * @see java.util.Iterator#next() */ public Object next () { if (mBuffer.remaining() <= 0) { throw new NoSuchElementException(); } boolean wsFound = false; boolean postWsFound = false; int startOfWs = 0; int endOfWs = 0; // CHECKME: on some platforms slice does not behave as stated in SDK // api, mBuffer is simply duplicated. final CharBuffer rc = mBuffer.slice(); for (int i = 0; i < mBuffer.remaining() && ! (wsFound && postWsFound); ++i) { final char c = mBuffer.charAt(i); if (Character.isWhitespace(c) && ! (c == PRESERVED_CHAR)) { if (! wsFound) { startOfWs = i; } endOfWs = i; wsFound = true; postWsFound = false; } else { postWsFound = true; } } setPositions(rc, wsFound, postWsFound, startOfWs, endOfWs); return rc; } /** * Sets the positions and limits of supplied buffer and internal buffer * for the result of a next call. * * @param rc This buffer is returned to the <code>next()</code> caller. * @param wsFound Flag denoting whether whitespace to replace has been * found. * @param postWsFound Flag for denoting whether chars after whitespace * have been found. * @param startOfWs The start index of whitespace chars to replace. * @param endOfWs THe end index of teh whitespace chars to replace. */ private void setPositions ( final CharBuffer rc, final boolean wsFound, final boolean postWsFound, final int startOfWs, final int endOfWs) { if (wsFound) { // CHECKME: if slice does not work as said in the api, then position // is > 0 and idx has to be added to the position to get the new // limit if (rc.position() > 0) { rc.limit(rc.position() + startOfWs); } else { rc.limit(startOfWs); } if (! postWsFound) { mBuffer.position(mBuffer.limit()); } else { mBuffer.position(mBuffer.position() + endOfWs + 1); } } else { mBuffer.position(mBuffer.limit()); } } /** * Skips over initial whitespace, which is not * {@linkplain WhitespaceFormat#PRESERVED_CHAR} */ private void skip () { boolean wsFound = false; boolean postWsFound = false; boolean first = true; int endOfWs = 0; for (int i = 0; i < mBuffer.remaining() && ((wsFound ^ postWsFound) || first); ++i) { first = false; final char c = mBuffer.charAt(i); if (Character.isWhitespace(c) && ! (c == PRESERVED_CHAR)) { wsFound = true; endOfWs = i; } else { postWsFound = wsFound; } } if (wsFound && ! postWsFound) { // only whitespace found mBuffer.position(mBuffer.limit()); } else if (wsFound) { // found chars after whitespace, so the following is correct mBuffer.position(mBuffer.position() + endOfWs); } } } /** * Creates a new instance of this with no sub format. */ public WhitespaceFormat () { this(null); } /** * Creates a new instance of this with the supplied sub format. * * @param subFormat The sub format to use for first step formatting of an * object. This will be used for parsing an object as well. Might be null. */ public WhitespaceFormat (final Format subFormat) { mSubFormat = subFormat; } /** * Replaces and reduces whitespace in the supplied message. The resulting * string will only have <code>'\u0020'</code> as white space. Any such * character in the source string is left untouched, all other whitespace * characters are replaced by <code>'\u0020'</code>, but with only one in a * row, so, for example, a sequence of 2 line separators will be replaced * by one <code>'\u0020'</code>. * * @param message The message in which to find and replace white space. * * @return String with replaced and reduced white space */ public static String format (final String message) { return format(CharBuffer.wrap(message)).toString(); } /** * Replaced and reduces whitespace in the supplied character buffer. * * @see #format(String) * @param message The message buffer in which to find and replace white * space. * * @return CharBuffer with replaced and reduced white space. This might be * <code>message</code> if it does not contain whitespace to replace. */ public static CharBuffer format (final CharBuffer message) { final WhitespaceIterator iter = new WhitespaceIterator(message); CharBuffer rc = null; boolean isFirst = true; boolean flip = false; while (iter.hasNext()) { final CharBuffer cb = (CharBuffer) iter.next(); if (! (isFirst || (rc == null))) { rc.put(PRESERVED_CHAR); rc.put(cb); } else if (isFirst) { isFirst = false; if ((cb.limit() == message.limit()) && (cb.position() == message.position())) { rc = message.duplicate(); } else { rc = CharBuffer.allocate(message.limit()); rc.put(cb); flip = true; } } else { // should never occur throw new RuntimeException("More than one string parts and no " + "target buffer is allocated"); } } if (flip) { rc.flip(); } return rc; } /** * If a sub format is set, it delegates parsing to the sub format. If no * subformat is set, it takes the source string until the first whitespace * char is found, which is not {@link #PRESERVED_CHAR}. * * @see java.text.Format#parseObject(java.lang.String, java.text.ParsePosition) */ public Object parseObject (final String source, final ParsePosition pos) { Object rc; if (mSubFormat == null) { int i = pos.getIndex(); final int len = source.length(); boolean endFound = false; while (i < len && ! endFound) { final char c = source.charAt(i); if (Character.isWhitespace(c) && c != PRESERVED_CHAR) { endFound = true; } else { ++i; } } if (endFound) { rc = source.substring(pos.getIndex(), i); pos.setIndex(i); } else { rc = source.substring(pos.getIndex()); pos.setIndex(len); } } else { rc = mSubFormat.parseObject(source, pos); } return rc; } /** * If a sub format is set, it uses this to format the object and compresses * the whitespace within the result. * If no sub format is set, it expects a String object and compresses the * whitespace on that. * * @param obj The object to format. * @param toAppendTo The string buffer where to append to the formatted * object. * @param pos The field position for formatting. * * @return StringBuffer with formatted objects. * * @see java.text.Format#format(java.lang.Object, java.lang.StringBuffer, java.text.FieldPosition) */ public StringBuffer format ( final Object obj, final StringBuffer toAppendTo, final FieldPosition pos) { if (mSubFormat == null) { if (! (obj instanceof String)) { throw new IllegalArgumentException("Supplied object to be formatted" + " must be a String but is " + obj.getClass().getName() + ": " + obj); } toAppendTo.append(format((String) obj)); if (pos != null) { pos.setBeginIndex(0); pos.setEndIndex(0); } } else { StringBuffer sb = new StringBuffer(); sb = mSubFormat.format(obj, sb, pos); toAppendTo.append(WhitespaceFormat.format(sb.toString())); } return toAppendTo; } }