/**
*
*/
package ecologylab.bigsemantics.html.utils;
import org.w3c.dom.Node;
import org.w3c.dom.Text;
import ecologylab.generic.StringBuilderBaseUtils;
import ecologylab.generic.StringTools;
/**
*
*
* @author andruid
*/
public class StringBuilderUtils extends StringBuilderBaseUtils
{
/**
* Trim whitespace off of the buffer referred to through pointers in the TdNode.
* If result is not empty, then decode the resulting text into a StringBuilder.
* <p/>
* For the result, an existing StringBuilder will be cleared (reset), or,
* if result == null on entry, a new StringBuilder is acquired from the pool.
* <p/>
* If there is a non-null result, it is up to the caller to call release(result) when it is no longer needed.
*
* @param result The StringBuilder we're using. Either the one passed in (and perhaps modified), if there was one,
* or a new one, if null was passed in and there are actually chars to decode after trim.
* @param childNode TdNode of source. Supplies byte array of characters, and start and end points.
*
* @return null if no work to do because length < minLength after trim, or a buffer with the decoded result.
*/
public static StringBuilder trimAndDecodeUTF8(StringBuilder result, Node childNode)
{
return trimAndDecodeUTF8(result, childNode, 0, false);
}
/**
* Trim whitespace off of the buffer referred to through pointers in the TdNode.
* If result is longer than minLength, then decode the resulting text into a StringBuilder.
* <p/>
* For the result, an existing StringBuilder will be cleared (reset), or,
* if result == null on entry, a new StringBuilder is acquired from the pool.
* <p/>
* If there is a non-null result, it is up to the caller to call release(result) when it is no longer needed.
*
* @param result The StringBuilder we're using. Either the one passed in (and perhaps modified), if there was one,
* or a new one, if null was passed in and there are actually chars to decode after trim.
* @param childNode TdNode of source. Supplies byte array of characters, and start and end points.
* @param minLength A threshold that is applied to decide if resulting chars should be decoded to result.
* For unconditional decode, set to 0.
*
* @return null if no work to do because length < minLength after trim, or a buffer with the decoded result.
*/
public static StringBuilder trimAndDecodeUTF8(StringBuilder result, Node childNode, int minLength)
{
return trimAndDecodeUTF8(result, childNode, minLength, false);
}
/**
* Trim whitespace off of the buffer referred to through pointers in the TdNode.
* If result is longer than minLength, then decode the resulting text into a StringBuilder.
* <p/>
* If appendNoClear is true, then append the new text to the old StringBuilder passed in as result (concatenate).
* Otherwise, an existing StringBuilder will be cleared (reset).
* <p/>
* If result == null on entry, a new StringBuilder is acquired from the pool.
* It is up to the caller to call release(result) when it is no longer needed.
*
* @param result The StringBuilder we're using. Either the one passed in (and perhaps modified), if there was one,
* or a new one, if null was passed in and there are actually chars to decode after trim.
* @param childNode TdNode of source. Supplies byte array of characters, and start and end points.
* @param minLength A threshold that is applied to decide if resulting chars should be decoded to result.
* For unconditional decode, set to 0.
* @param appendNoClear If true, we append to prior results without clearing them.
*
* @return null if no work to do because length < minLength after trim, or a buffer with the decoded result.
*/
//FIXME - textarray(), start(), end()
public static StringBuilder trimAndDecodeUTF8(StringBuilder result, Node childNode, int minLength, boolean appendNoClear)
{
byte[] textarray = null;
if (childNode instanceof Text)
{
textarray = childNode.getNodeValue().getBytes();
}
else
textarray = childNode.getNodeName().getBytes();
// int start = childNode.start();
// int end = childNode.end();
//
// // trim in place
// while (Character.isWhitespace((char) textarray[start]) && (start < end))
// {
// start++;
// }
// while (Character.isWhitespace((char) textarray[end - 1]) && (start < end))
// {
// end--;
// }
int length = textarray.length;
if (length > minLength)
{
if (!((length >= 4) && (textarray[0] == '<') &&
(textarray[1] == '!') && (textarray[2] == '-') && (textarray[3] == '-')))
{
if (result == null)
result = acquire();
else if (!appendNoClear)
StringTools.clear(result);
StringTools.decodeUTF8(result, textarray, 0, length);
}
}
return result;
}
}