/* * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/TranslateFunction.java,v 1.10 2006/02/05 21:47:41 elharo Exp $ * $Revision: 1.10 $ * $Date: 2006/02/05 21:47:41 $ * * ==================================================================== * * Copyright 2000-2002 bob mcwhirter & James Strachan. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * * Neither the name of the Jaxen Project nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * This software consists of voluntary contributions made by many * individuals on behalf of the Jaxen Project and was originally * created by bob mcwhirter <bob@werken.com> and * James Strachan <jstrachan@apache.org>. For more information on the * Jaxen Project, please see <http://www.jaxen.org/>. * * $Id: TranslateFunction.java,v 1.10 2006/02/05 21:47:41 elharo Exp $ */ package org.orbeon.jaxen.function; import org.orbeon.jaxen.Context; import org.orbeon.jaxen.Function; import org.orbeon.jaxen.FunctionCallException; import org.orbeon.jaxen.Navigator; import java.util.HashMap; import java.util.List; import java.util.Map; /** * <p> * <b>4.2</b> * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code> * </p> * * <blockquote src="http://www.w3.org/TR/xpath#function-translate"> * <p> * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function * returns the first argument string with occurrences of characters in * the second argument string replaced by the character at the * corresponding position in the third argument string. For example, * <code>translate("bar","abc","ABC")</code> returns the string * <code>BAr</code>. If there is a character in the second argument * string with no character at a corresponding position in the third * argument string (because the second argument string is longer than * the third argument string), then occurrences of that character in the * first argument string are removed. For example, * <code>translate("--aaa--","abc-","ABC")</code> returns * <code>"AAA"</code>. If a character occurs more than once in the * second argument string, then the first occurrence determines the * replacement character. If the third argument string is longer than * the second argument string, then excess characters are ignored. * </p> * * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a * sufficient solution for case conversion in all languages. A future * version of XPath may provide additional functions for case * conversion.</blockquote> * * </blockquote> * * @author Jan Dvorak ( jan.dvorak @ mathan.cz ) * * @see <a href="http://www.w3.org/TR/xpath#function-translate" * target="_top">Section 4.2 of the XPath Specification</a> */ public class TranslateFunction implements Function { /* The translation is done thru a HashMap. Performance tip (for anyone * who needs to improve the performance of this particular function): * Cache the HashMaps, once they are constructed. */ /** * Create a new <code>TranslateFunction</code> object. */ public TranslateFunction() {} /** Returns a copy of the first argument in which * characters found in the second argument are replaced by * corresponding characters from the third argument. * * @param context the context at the point in the * expression when the function is called * @param args a list that contains exactly three items * * @return a <code>String</code> built from <code>args.get(0)</code> * in which occurrences of characters in <code>args.get(1)</code> * are replaced by the corresponding characters in <code>args.get(2)</code> * * @throws FunctionCallException if <code>args</code> does not have exactly three items */ public Object call(Context context, List args) throws FunctionCallException { if (args.size() == 3) { return evaluate( args.get(0), args.get(1), args.get(2), context.getNavigator() ); } throw new FunctionCallException( "translate() requires three arguments." ); } /** * Returns a copy of <code>strArg</code> in which * characters found in <code>fromArg</code> are replaced by * corresponding characters from <code>toArg</code>. * If necessary each argument is first converted to it string-value * as if by the XPath <code>string()</code> function. * * @param strArg the base string * @param fromArg the characters to be replaced * @param toArg the characters they will be replaced by * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments. * * @return a copy of <code>strArg</code> in which * characters found in <code>fromArg</code> are replaced by * corresponding characters from <code>toArg</code> * * @throws FunctionCallException if one of the arguments is a malformed Unicode string; * that is, if surrogate characters don't line up properly * */ public static String evaluate(Object strArg, Object fromArg, Object toArg, Navigator nav) throws FunctionCallException { String inStr = StringFunction.evaluate( strArg, nav ); String fromStr = StringFunction.evaluate( fromArg, nav ); String toStr = StringFunction.evaluate( toArg, nav ); // Initialize the mapping in a HashMap Map characterMap = new HashMap(); String[] fromCharacters = toUnicodeCharacters(fromStr); String[] toCharacters = toUnicodeCharacters(toStr); int fromLen = fromCharacters.length; int toLen = toCharacters.length; for ( int i = 0; i < fromLen; i++ ) { String cFrom = fromCharacters[i]; if ( characterMap.containsKey( cFrom ) ) { // We've seen the character before, ignore continue; } if ( i < toLen ) { // Will change characterMap.put( cFrom, toCharacters[i] ); } else { // Will delete characterMap.put( cFrom, null ); } } // Process the input string thru the map StringBuffer outStr = new StringBuffer( inStr.length() ); String[] inCharacters = toUnicodeCharacters(inStr); int inLen = inCharacters.length; for ( int i = 0; i < inLen; i++ ) { String cIn = inCharacters[i]; if ( characterMap.containsKey( cIn ) ) { String cTo = (String) characterMap.get( cIn ); if ( cTo != null ) { outStr.append( cTo ); } } else { outStr.append( cIn ); } } return outStr.toString(); } private static String[] toUnicodeCharacters(String s) throws FunctionCallException { String[] result = new String[s.length()]; int stringLength = 0; for (int i = 0; i < s.length(); i++) { char c1 = s.charAt(i); if (isHighSurrogate(c1)) { try { char c2 = s.charAt(i+1); if (isLowSurrogate(c2)) { result[stringLength] = (c1 + "" + c2).intern(); i++; } else { throw new FunctionCallException("Mismatched surrogate pair in translate function"); } } catch (StringIndexOutOfBoundsException ex) { throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function"); } } else { result[stringLength]=String.valueOf(c1).intern(); } stringLength++; } if (stringLength == result.length) return result; // trim array String[] trimmed = new String[stringLength]; System.arraycopy(result, 0, trimmed, 0, stringLength); return trimmed; } private static boolean isHighSurrogate(char c) { return c >= 0xD800 && c <= 0xDBFF; } private static boolean isLowSurrogate(char c) { return c >= 0xDC00 && c <= 0xDFFF; } }