/*
* @(#)$Id: UnicodeUtil.java,v 1.7 2001/07/31 22:40:06 Bear Exp $
*
* Copyright 2001 Sun Microsystems, Inc. All Rights Reserved.
*
* This software is the proprietary information of Sun Microsystems, Inc.
* Use is subject to license terms.
*
*/
package com.sun.msv.datatype.xsd;
/**
* Unicode-related utility functions.
*
* @author <a href="mailto:kohsuke.kawaguchi@eng.sun.com">Kohsuke KAWAGUCHI</a>
*/
public class UnicodeUtil {
/**
* Count the number of "character" in Unicode string.
*
* "character" here is defined by http://www.w3.org/TR/REC-xml#NT-Char
* Basically, all the work this function will do is
* to take care of surrogate pairs.
*
* If string contains any char ('char' in Java datatype) other than those
* allowed in XML spec, the behavior is undefined. However, we can safely
* assume that XML parser performs this check before we receive the value.
*/
public static int countLength( String str ) {
final int len = str.length();
int count=0;
for( int i=0; i<len; i++ ) {
final char ch = str.charAt(i);
// skip the first half of surrogate pair
// we can safely assume that the last half of surrogate pair follows.
// because that's a requirement for XML parser
if( 0xD800 <= ch && ch < 0xDC00 ) continue;
count++;
}
return count;
}
}