/* eXist Native XML Database
* Copyright (C) 2006-2009, The eXist Project
* http://exist-db.org/
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* $Id$
*/
package org.exist.xquery.functions.fn;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.exist.dom.QName;
import org.exist.xquery.Cardinality;
import org.exist.xquery.Dependency;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.Profiler;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;
import java.text.Normalizer;
/**
* Implements fn:normalize-unicode()
*
* @author perig
*
*/
public class FunNormalizeUnicode extends Function {
protected static final Logger logger = LogManager.getLogger(FunNormalizeUnicode.class);
protected static final String FUNCTION_DESCRIPTION_0_PARAM =
"Returns the value of the context item normalized according to the " +
"nomalization form \"NFC\"\n\n";
protected static final String FUNCTION_DESCRIPTION_1_PARAM =
"Returns the value of $arg normalized according to the " +
"normalization criteria for a normalization form identified " +
"by the value of $normalization-form. The effective value of " +
"the $normalization-form is computed by removing leading and " +
"trailing blanks, if present, and converting to upper case.\n\n" +
"If the value of $arg is the empty sequence, returns the zero-length string.\n\n" +
"See [Character Model for the World Wide Web 1.0: Normalization] " +
"for a description of the normalization forms.\n\n" +
"- If the effective value of $normalization-form is \"NFC\", then the value " +
"returned by the function is the value of $arg in Unicode Normalization Form C (NFC).\n" +
"- If the effective value of $normalization-form is \"NFD\", then the value " +
"returned by the function is the value of $arg in Unicode Normalization Form D (NFD).\n" +
"- If the effective value of $normalization-form is \"NFKC\", then the value " +
"returned by the function is the value of $arg in Unicode Normalization Form KC (NFKC).\n" +
"- If the effective value of $normalization-form is \"NFKD\", then the value " +
"returned by the function is the value of $arg in Unicode Normalization Form KD (NFKD).\n" +
"- If the effective value of $normalization-form is \"FULLY-NORMALIZED\", then the value " +
"returned by the function is the value of $arg in the fully normalized form.\n" +
"- If the effective value of $normalization-form is the zero-length string, " +
"no normalization is performed and $arg is returned.\n\n" +
"Conforming implementations must support normalization form \"NFC\" and may " +
"support normalization forms \"NFD\", \"NFKC\", \"NFKD\", \"FULLY-NORMALIZED\". " +
"They may also support other normalization forms with implementation-defined semantics. " +
"If the effective value of the $normalization-form is other than one of the values " +
"supported by the implementation, then an error is raised [err:FOCH0003].";
protected static final FunctionParameterSequenceType ARG_PARAM = new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The unicode string to normalize");
protected static final FunctionParameterSequenceType NF_PARAM = new FunctionParameterSequenceType("normalization-form", Type.STRING, Cardinality.ONE, "The normalization form");
protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.STRING, Cardinality.ONE, "the normalized text");
public final static FunctionSignature signatures [] = {
new FunctionSignature(
new QName("normalize-unicode", Function.BUILTIN_FUNCTION_NS),
FUNCTION_DESCRIPTION_0_PARAM,
new SequenceType[] { ARG_PARAM },
RETURN_TYPE
),
new FunctionSignature (
new QName("normalize-unicode", Function.BUILTIN_FUNCTION_NS),
FUNCTION_DESCRIPTION_1_PARAM,
new SequenceType[] { ARG_PARAM, NF_PARAM },
RETURN_TYPE
)
};
public FunNormalizeUnicode(XQueryContext context, FunctionSignature signature) {
super(context, signature);
}
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
if (context.getProfiler().isEnabled()) {
context.getProfiler().start(this);
context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies()));
if (contextSequence != null)
{context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);}
if (contextItem != null)
{context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());}
}
if (contextItem != null)
{contextSequence = contextItem.toSequence();}
Sequence result;
final Sequence s1 = getArgument(0).eval(contextSequence);
if (s1.isEmpty())
{result = StringValue.EMPTY_STRING;}
else {
String newNormalizationForm = "NFC";
if (getArgumentCount() > 1)
{newNormalizationForm = getArgument(1).eval(contextSequence).getStringValue().toUpperCase().trim();}
//TODO : handle the "FULLY-NORMALIZED" string...
if ("".equals(newNormalizationForm))
{result = new StringValue(s1.getStringValue());}
else {
try {
Normalizer.Form form = Normalizer.Form.valueOf(newNormalizationForm);
result = new StringValue(Normalizer.normalize(s1.getStringValue(), form));
} catch (IllegalArgumentException e) {
throw new XPathException(this, ErrorCodes.FOCH0003, "Unknown normalization form: " +
newNormalizationForm);
}
}
}
if (context.getProfiler().isEnabled())
{context.getProfiler().end(this, "", result);}
return result;
}
}