/* * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/SubstringFunction.java,v 1.16 2006/02/05 21:47:41 elharo Exp $ * $Revision: 1.16 $ * $Date: 2006/02/05 21:47:41 $ * * ==================================================================== * * Copyright 2000-2002 bob mcwhirter & James Strachan. * All rights reserved. * * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * * Neither the name of the Jaxen Project nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * This software consists of voluntary contributions made by many * individuals on behalf of the Jaxen Project and was originally * created by bob mcwhirter <bob@werken.com> and * James Strachan <jstrachan@apache.org>. For more information on the * Jaxen Project, please see <http://www.jaxen.org/>. * */ package org.orbeon.jaxen.function; import org.orbeon.jaxen.Context; import org.orbeon.jaxen.Function; import org.orbeon.jaxen.FunctionCallException; import org.orbeon.jaxen.Navigator; import java.util.List; /** * <p> * <b>4.2</b> * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code> * </p> * * <blockquote src="http://www.w3.org/TR/xpath"> * <p>The <b>substring</b> function returns the * substring of the first argument starting at the position specified in * the second argument with length specified in the third argument. For * example, * * <code>substring("12345",2,3)</code> returns <code>"234"</code>. * If the third argument is not specified, it returns the substring * starting at the position specified in the second argument and * continuing to the end of the string. For example, * <code>substring("12345",2)</code> returns <code>"2345"</code>. * </p> * * <p> * More precisely, each character in the string (see <a * href="http://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a * numeric position: the position of the first character is 1, the * position of the second character is 2 and so on. * </p> * * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in * which the <code>String.substring</code> method treats the position * of the first character as 0.</blockquote> * * <p> * The returned substring contains those characters for which the * position of the character is greater than or equal to the rounded * value of the second argument and, if the third argument is specified, * less than the sum of the rounded value of the second argument and the * rounded value of the third argument; the comparisons and addition * used for the above follow the standard IEEE 754 rules; rounding is * done as if by a call to the <b><a href="#function-round">round</a></b> * function. The following examples illustrate various unusual cases: * </p> * * <ul> * * <li> * <p> * <code>substring("12345", 1.5, 2.6)</code> returns * <code>"234"</code> * </p> * </li> * * <li> * <p> * <code>substring("12345", 0, 3)</code> returns <code>"12"</code> * * </p> * </li> * * <li> * <p> * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code> * </p> * </li> * * <li> * <p>. * <code>substring("12345", 1, 0 div 0)</code> returns * * <code>""</code> * </p> * </li> * * <li> * <p> * <code>substring("12345", -42, 1 div 0)</code> returns * <code>"12345"</code> * </p> * </li> * * <li> * <p> * * <code>substring("12345", -1 div 0, 1 div 0)</code> returns * <code>""</code> </blockquote> * * @author bob mcwhirter (bob @ werken.com) * * @see <a href="http://www.w3.org/TR/xpath#function-substring" * target="_top">Section 4.2 of the XPath Specification</a> */ public class SubstringFunction implements Function { /** * Create a new <code>SubstringFunction</code> object. */ public SubstringFunction() {} /** Returns a substring of an XPath string-value by character index. * * @param context the context at the point in the * expression when the function is called * @param args a list that contains two or three items * * @return a <code>String</code> containing the specifed character subsequence of * the original string or the string-value of the context node * * @throws FunctionCallException if <code>args</code> has more than three * or less than two items */ public Object call(Context context, List args) throws FunctionCallException { final int argc = args.size(); if (argc < 2 || argc > 3){ throw new FunctionCallException( "substring() requires two or three arguments." ); } final Navigator nav = context.getNavigator(); final String str = StringFunction.evaluate(args.get(0), nav ); // The spec doesn't really address this case if (str == null) { return ""; } final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue(); if (stringLength == 0) { return ""; } Double d1 = NumberFunction.evaluate(args.get(1), nav); if (d1.isNaN()){ return ""; } // Round the value and subtract 1 as Java strings are zero based int start = RoundFunction.evaluate(d1, nav).intValue() - 1; int substringLength = stringLength; if (argc == 3){ Double d2 = NumberFunction.evaluate(args.get(2), nav); if (!d2.isNaN()){ substringLength = RoundFunction.evaluate(d2, nav ).intValue(); } else { substringLength = 0; } } if (substringLength < 0) return ""; int end = start + substringLength; if (argc == 2) end = stringLength; // negative start is treated as 0 if ( start < 0){ start = 0; } else if (start > stringLength){ return ""; } if (end > stringLength){ end = stringLength; } else if (end < start) return ""; if (stringLength == str.length()) { // easy case; no surrogate pairs return str.substring(start, end); } else { return unicodeSubstring(str, start, end); } } private static String unicodeSubstring(String s, int start, int end) { StringBuffer result = new StringBuffer(s.length()); for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) { char c = s.charAt(jChar); if (uChar >= start) result.append(c); if (c >= 0xD800) { // get the low surrogate // ???? we could check here that this is indeed a low surroagte // we could also catch StringIndexOutOfBoundsException jChar++; if (uChar >= start) result.append(s.charAt(jChar)); } } return result.toString(); } }