/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.complexscripts.util; import org.apache.fop.util.CharUtilities; /** * <p>UTF32 related utilities.</p> * * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p> */ public final class UTF32 { private UTF32() { } /** * Convert Java string (UTF-16) to a Unicode scalar array (UTF-32). * Note that if there are any non-BMP encoded characters present in the * input, then the number of entries in the output array will be less * than the number of elements in the input string. Any * @param s input string * @param substitution value to substitute for ill-formed surrogate * @param errorOnSubstitution throw runtime exception (IllegalArgumentException) in * case this argument is true and a substitution would be attempted * @return output scalar array * @throws IllegalArgumentException if substitution required and errorOnSubstitution * is not false */ public static Integer[] toUTF32(String s, int substitution, boolean errorOnSubstitution) throws IllegalArgumentException { int n; if ((n = s.length()) == 0) { return new Integer[0]; } else { Integer[] sa = new Integer [ n ]; int k = 0; for (int i = 0; i < n; i++) { int c = (int) s.charAt(i); if ((c >= 0xD800) && (c < 0xE000)) { int s1 = c; int s2 = ((i + 1) < n) ? (int) s.charAt(i + 1) : 0; if (s1 < 0xDC00) { if ((s2 >= 0xDC00) && (s2 < 0xE000)) { c = ((s1 - 0xD800) << 10) + (s2 - 0xDC00) + 65536; i++; } else { if (errorOnSubstitution) { throw new IllegalArgumentException( "isolated high (leading) surrogate"); } else { c = substitution; } } } else { if (errorOnSubstitution) { throw new IllegalArgumentException( "isolated low (trailing) surrogate"); } else { c = substitution; } } } sa[k++] = c; } if (k == n) { return sa; } else { Integer[] na = new Integer [ k ]; System.arraycopy(sa, 0, na, 0, k); return na; } } } /** * Convert a Unicode scalar array (UTF-32) a Java string (UTF-16). * @param sa input scalar array * @return output (UTF-16) string * @throws IllegalArgumentException if an input scalar value is illegal, * e.g., a surrogate or out of range */ public static String fromUTF32(Integer[] sa) throws IllegalArgumentException { StringBuffer sb = new StringBuffer(); for (int s : sa) { if (s < 65535) { if ((s < 0xD800) || (s > 0xDFFF)) { sb.append((char) s); } else { String ncr = CharUtilities.charToNCRef(s); throw new IllegalArgumentException( "illegal scalar value 0x" + ncr.substring(2, ncr.length() - 1) + "; cannot be UTF-16 surrogate"); } } else if (s < 1114112) { int s1 = (((s - 65536) >> 10) & 0x3FF) + 0xD800; int s2 = (((s - 65536) >> 0) & 0x3FF) + 0xDC00; sb.append((char) s1); sb.append((char) s2); } else { String ncr = CharUtilities.charToNCRef(s); throw new IllegalArgumentException( "illegal scalar value 0x" + ncr.substring(2, ncr.length() - 1) + "; out of range for UTF-16"); } } return sb.toString(); } }