/** ******************************************************************************* * Copyright (C) 1996-2006, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * ******************************************************************************* */ /** * A JNI interface for ICU converters. * * * @author Ram Viswanadha, IBM */ package com.ibm.icu4jni.charset; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import com.ibm.icu4jni.common.ErrorCode; // BEGIN android-removed // import com.ibm.icu4jni.converters.NativeConverter; // END android-removed public final class CharsetEncoderICU extends CharsetEncoder { private static final int INPUT_OFFSET = 0, OUTPUT_OFFSET = 1, INVALID_CHARS = 2, INPUT_HELD = 3, LIMIT = 4; /* data is 3 element array where * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written * data[INVALID_CHARS] = number of invalid chars * data[INPUT_HELD] = number of input chars held in the converter's state */ private int[] data = new int[LIMIT]; /* handle to the ICU converter that is opened */ private long converterHandle=0; private char[] input = null; private byte[] output = null; // BEGIN android-added private char[] allocatedInput = null; private byte[] allocatedOutput = null; // END android-added // These instance variables are // always assigned in the methods // before being used. This class // inhrently multithread unsafe // so we dont have to worry about // synchronization private int inEnd; private int outEnd; private int ec; private int savedInputHeldLen; private int onUnmappableInput = NativeConverter.STOP_CALLBACK;; private int onMalformedInput = NativeConverter.STOP_CALLBACK;; /** * Construcs a new encoder for the given charset * @param cs for which the decoder is created * @param cHandle the address of ICU converter * @param replacement the substitution bytes * @stable ICU 2.4 */ public CharsetEncoderICU(Charset cs, long cHandle, byte[] replacement) { super( cs, (float) NativeConverter.getAveBytesPerChar(cHandle), (float) NativeConverter.getMaxBytesPerChar(cHandle), replacement); byte[] sub = replacement(); // The default callback action on unmappable input // or malformed input is to ignore so we set ICU converter // callback to stop and report the error ec = NativeConverter.setCallbackEncode( cHandle, onMalformedInput, onUnmappableInput, sub, sub.length); converterHandle = cHandle; if (ErrorCode.isFailure(ec)) { throw ErrorCode.getException(ec); } } /** * Sets this encoders replacement string. Substitutes the string in output if an * umappable or illegal sequence is encountered * @param newReplacement to replace the error chars with * @stable ICU 2.4 */ protected void implReplaceWith(byte[] newReplacement) { if (converterHandle != 0) { if (newReplacement.length > NativeConverter.getMaxBytesPerChar(converterHandle)) { throw new IllegalArgumentException("Number of replacement Bytes are greater than max bytes per char"); } ec = NativeConverter.setSubstitutionBytes(converterHandle, newReplacement, newReplacement.length); if (ErrorCode.isFailure(ec)) { throw ErrorCode.getException(ec); } } } /** * Sets the action to be taken if an illegal sequence is encountered * @param newAction action to be taken * @exception IllegalArgumentException * @stable ICU 2.4 */ protected void implOnMalformedInput(CodingErrorAction newAction) { onMalformedInput = NativeConverter.STOP_CALLBACK; if (newAction.equals(CodingErrorAction.IGNORE)) { onMalformedInput = NativeConverter.SKIP_CALLBACK; } else if (newAction.equals(CodingErrorAction.REPLACE)) { onMalformedInput = NativeConverter.SUBSTITUTE_CALLBACK; } byte[] sub = replacement(); ec = NativeConverter.setCallbackEncode(converterHandle, onMalformedInput, onUnmappableInput, sub, sub.length); if (ErrorCode.isFailure(ec)) { throw ErrorCode.getException(ec); } } /** * Sets the action to be taken if an illegal sequence is encountered * @param newAction action to be taken * @exception IllegalArgumentException * @stable ICU 2.4 */ protected void implOnUnmappableCharacter(CodingErrorAction newAction) { onUnmappableInput = NativeConverter.STOP_CALLBACK; if (newAction.equals(CodingErrorAction.IGNORE)) { onUnmappableInput = NativeConverter.SKIP_CALLBACK; } else if (newAction.equals(CodingErrorAction.REPLACE)) { onUnmappableInput = NativeConverter.SUBSTITUTE_CALLBACK; } byte[] sub = replacement(); ec = NativeConverter.setCallbackEncode(converterHandle, onMalformedInput, onUnmappableInput, sub, sub.length); if (ErrorCode.isFailure(ec)) { throw ErrorCode.getException(ec); } } /** * Flushes any characters saved in the converter's internal buffer and * resets the converter. * @param out action to be taken * @return result of flushing action and completes the decoding all input. * Returns CoderResult.UNDERFLOW if the action succeeds. * @stable ICU 2.4 */ protected CoderResult implFlush(ByteBuffer out) { try { data[OUTPUT_OFFSET] = getArray(out); ec = NativeConverter.flushCharToByte(converterHandle,/* Handle to ICU Converter */ output, /* output array of chars */ outEnd, /* output index+1 to be written */ data /* contains data, inOff,outOff */ ); /* If we don't have room for the output, throw an exception*/ if (ErrorCode.isFailure(ec)) { if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) { return CoderResult.OVERFLOW; }else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling if(data[INPUT_OFFSET]>0){ return CoderResult.malformedForLength(data[INPUT_OFFSET]); } }else { ErrorCode.getException(ec); } } return CoderResult.UNDERFLOW; } finally { setPosition(out); implReset(); } } /** * Resets the from Unicode mode of converter * @stable ICU 2.4 */ protected void implReset() { NativeConverter.resetCharToByte(converterHandle); data[INPUT_OFFSET] = 0; data[OUTPUT_OFFSET] = 0; data[INVALID_CHARS] = 0; data[INPUT_HELD] = 0; savedInputHeldLen = 0; } /** * Encodes one or more chars. The default behaviour of the * converter is stop and report if an error in input stream is encountered. * To set different behaviour use @see CharsetEncoder.onMalformedInput() * @param in buffer to decode * @param out buffer to populate with decoded result * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding * action succeeds or more input is needed for completing the decoding action. * @stable ICU 2.4 */ protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { if (!in.hasRemaining()) { return CoderResult.UNDERFLOW; } data[INPUT_OFFSET] = getArray(in); data[OUTPUT_OFFSET]= getArray(out); data[INPUT_HELD] = 0; // BEGIN android-added data[INVALID_CHARS] = 0; // Make sure we don't see earlier errors. // END android added try { /* do the conversion */ ec = NativeConverter.encode(converterHandle,/* Handle to ICU Converter */ input, /* input array of bytes */ inEnd, /* last index+1 to be converted */ output, /* output array of chars */ outEnd, /* output index+1 to be written */ data, /* contains data, inOff,outOff */ false /* donot flush the data */ ); if (ErrorCode.isFailure(ec)) { /* If we don't have room for the output return error */ if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) { return CoderResult.OVERFLOW; } else if (ec == ErrorCode.U_INVALID_CHAR_FOUND) { return CoderResult.unmappableForLength(data[INVALID_CHARS]); } else if (ec == ErrorCode.U_ILLEGAL_CHAR_FOUND) { // in.position(in.position() - 1); return CoderResult.malformedForLength(data[INVALID_CHARS]); } } return CoderResult.UNDERFLOW; } finally { /* save state */ setPosition(in); setPosition(out); } } /** * Ascertains if a given Unicode character can * be converted to the target encoding * * @param c the character to be converted * @return true if a character can be converted * @stable ICU 2.4 * */ public boolean canEncode(char c) { return canEncode((int) c); } /** * Ascertains if a given Unicode code point (32bit value for handling surrogates) * can be converted to the target encoding. If the caller wants to test if a * surrogate pair can be converted to target encoding then the * responsibility of assembling the int value lies with the caller. * For assembling a code point the caller can use UTF16 class of ICU4J and do something like: * <pre> * while(i<mySource.length){ * if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){ * if(UTF16.isTrailSurrogate(mySource[i+1])){ * int temp = UTF16.charAt(mySource,i,i+1,0); * if(!((CharsetEncoderICU) myConv).canEncode(temp)){ * passed=false; * } * i++; * i++; * } * } * } * </pre> * or * <pre> * String src = new String(mySource); * int i,codepoint; * boolean passed = false; * while(i<src.length()){ * codepoint = UTF16.charAt(src,i); * i+= (codepoint>0xfff)? 2:1; * if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){ * passed = false; * } * } * </pre> * * @param codepoint Unicode code point as int value * @return true if a character can be converted * @obsolete ICU 2.4 * @deprecated ICU 3.4 */ public boolean canEncode(int codepoint) { return NativeConverter.canEncode(converterHandle, codepoint); } /** * Releases the system resources by cleanly closing ICU converter opened * @exception Throwable exception thrown by super class' finalize method * @stable ICU 2.4 */ protected void finalize() throws Throwable { NativeConverter.closeConverter(converterHandle); super.finalize(); converterHandle=0; } //------------------------------------------ // private utility methods //------------------------------------------ private final int getArray(ByteBuffer out) { if(out.hasArray()){ // BEGIN android-changed: take arrayOffset into account output = out.array(); outEnd = out.arrayOffset() + out.limit(); return out.arrayOffset() + out.position(); // END android-changed }else{ outEnd = out.remaining(); // BEGIN android-added if (allocatedOutput == null || (outEnd > allocatedOutput.length)) { allocatedOutput = new byte[outEnd]; } output = allocatedOutput; // END android-added //since the new // buffer start position // is 0 return 0; } } private final int getArray(CharBuffer in) { if(in.hasArray()){ // BEGIN android-changed: take arrayOffset into account input = in.array(); inEnd = in.arrayOffset() + in.limit(); return in.arrayOffset() + in.position() + savedInputHeldLen;/*exclude the number fo bytes held in previous conversion*/ // END android-changed }else{ inEnd = in.remaining(); // BEGIN android-added if (allocatedInput == null || (inEnd > allocatedInput.length)) { allocatedInput = new char[inEnd]; } input = allocatedInput; // END android-added // save the current position int pos = in.position(); in.get(input,0,inEnd); // reset the position in.position(pos); // the start position // of the new buffer // is whatever is savedInputLen return savedInputHeldLen; } } private final void setPosition(ByteBuffer out) { if (out.hasArray()) { // in getArray method we accessed the // array backing the buffer directly and wrote to // it, so just just set the position and return. // This is done to avoid the creation of temp array. // BEGIN android-changed: take arrayOffset into account out.position(out.position() + data[OUTPUT_OFFSET] - out.arrayOffset()); // END android-changed } else { out.put(output, 0, data[OUTPUT_OFFSET]); } // BEGIN android-added // release reference to output array, which may not be ours output = null; // END android-added } private final void setPosition(CharBuffer in){ // BEGIN android-removed // // was there input held in the previous invocation of encodeLoop // // that resulted in output in this invocation? // if(data[OUTPUT_OFFSET]>0 && savedInputHeldLen>0){ // int len = in.position() + data[INPUT_OFFSET] + savedInputHeldLen; // in.position(len); // savedInputHeldLen = data[INPUT_HELD]; // }else{ // in.position(in.position() + data[INPUT_OFFSET] + savedInputHeldLen); // savedInputHeldLen = data[INPUT_HELD]; // in.position(in.position() - savedInputHeldLen); // } // END android-removed // BEGIN android-added // Slightly rewired original code to make it cleaner. Also // added a fix for the problem where input charatcers got // lost when invalid characters were encountered. Not sure // what happens when data[INVALID_CHARS] is > 1, though, // since we never saw that happening. int len = in.position() + data[INPUT_OFFSET] + savedInputHeldLen; len -= data[INVALID_CHARS]; // Otherwise position becomes wrong. in.position(len); savedInputHeldLen = data[INPUT_HELD]; // was there input held in the previous invocation of encodeLoop // that resulted in output in this invocation? if(!(data[OUTPUT_OFFSET]>0 && savedInputHeldLen>0)){ in.position(in.position() - savedInputHeldLen); } // END android-added // BEGIN android-added // release reference to input array, which may not be ours input = null; // END android-added } }