/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tomcat.util.buf; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Locale; import java.util.Map; import org.apache.tomcat.util.res.StringManager; /** * NIO based character decoder. */ public class B2CConverter { private static final StringManager sm = StringManager.getManager(B2CConverter.class); private static final Map<String, Charset> encodingToCharsetCache = new HashMap<>(); // Protected so unit tests can use it protected static final int LEFTOVER_SIZE = 9; static { for (Charset charset: Charset.availableCharsets().values()) { encodingToCharsetCache.put( charset.name().toLowerCase(Locale.ENGLISH), charset); for (String alias : charset.aliases()) { encodingToCharsetCache.put( alias.toLowerCase(Locale.ENGLISH), charset); } } } /** * Obtain the Charset for the given encoding * * @param enc The name of the encoding for the required charset * * @return The Charset corresponding to the requested encoding * * @throws UnsupportedEncodingException If the requested Charset is not * available */ public static Charset getCharset(String enc) throws UnsupportedEncodingException { // Encoding names should all be ASCII String lowerCaseEnc = enc.toLowerCase(Locale.ENGLISH); Charset charset = encodingToCharsetCache.get(lowerCaseEnc); if (charset == null) { // Pre-population of the cache means this must be invalid throw new UnsupportedEncodingException( sm.getString("b2cConverter.unknownEncoding", lowerCaseEnc)); } return charset; } private final CharsetDecoder decoder; private ByteBuffer bb = null; private CharBuffer cb = null; /** * Leftover buffer used for incomplete characters. */ private final ByteBuffer leftovers; public B2CConverter(Charset charset) { this(charset, false); } public B2CConverter(Charset charset, boolean replaceOnError) { byte[] left = new byte[LEFTOVER_SIZE]; leftovers = ByteBuffer.wrap(left); CodingErrorAction action; if (replaceOnError) { action = CodingErrorAction.REPLACE; } else { action = CodingErrorAction.REPORT; } // Special case. Use the Apache Harmony based UTF-8 decoder because it // - a) rejects invalid sequences that the JVM decoder does not // - b) fails faster for some invalid sequences if (charset.equals(StandardCharsets.UTF_8)) { decoder = new Utf8Decoder(); } else { decoder = charset.newDecoder(); } decoder.onMalformedInput(action); decoder.onUnmappableCharacter(action); } /** * Reset the decoder state. */ public void recycle() { decoder.reset(); leftovers.position(0); } /** * Convert the given bytes to characters. * * @param bc byte input * @param cc char output * @param endOfInput Is this all of the available data * * @throws IOException If the conversion can not be completed */ public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput) throws IOException { if ((bb == null) || (bb.array() != bc.getBuffer())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength()); } else { // Initialize the byte buffer bb.limit(bc.getEnd()); bb.position(bc.getStart()); } if ((cb == null) || (cb.array() != cc.getBuffer())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), cc.getBuffer().length - cc.getEnd()); } else { // Initialize the char buffer cb.limit(cc.getBuffer().length); cb.position(cc.getEnd()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = cb.position(); // Loop until one char is decoded or there is a decoder error do { leftovers.put(bc.substractB()); leftovers.flip(); result = decoder.decode(leftovers, cb, endOfInput); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (cb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } bb.position(bc.getStart()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = decoder.decode(bb, cb, endOfInput); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk, if // this continues the char buffer will get resized bc.setOffset(bb.position()); cc.setEnd(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.setOffset(bb.position()); cc.setEnd(cb.position()); // Put leftovers in the leftovers byte buffer if (bc.getLength() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(bc.getLength()); bc.substract(leftovers.array(), 0, bc.getLength()); } } } /** * Convert the given bytes to characters. * * @param bc byte input * @param cc char output * @param ic byte input channel * @param endOfInput Is this all of the available data * * @throws IOException If the conversion can not be completed */ public void convert(ByteBuffer bc, CharBuffer cc, ByteChunk.ByteInputChannel ic, boolean endOfInput) throws IOException { if ((bb == null) || (bb.array() != bc.array())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.array(), bc.arrayOffset() + bc.position(), bc.remaining()); } else { // Initialize the byte buffer bb.limit(bc.limit()); bb.position(bc.position()); } if ((cb == null) || (cb.array() != cc.array())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.array(), cc.limit(), cc.capacity() - cc.limit()); } else { // Initialize the char buffer cb.limit(cc.capacity()); cb.position(cc.limit()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = cb.position(); // Loop until one char is decoded or there is a decoder error do { byte chr; if (bc.remaining() == 0) { int n = ic.realReadBytes(); chr = n < 0 ? -1 : bc.get(); } else { chr = bc.get(); } leftovers.put(chr); leftovers.flip(); result = decoder.decode(leftovers, cb, endOfInput); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (cb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } bb.position(bc.position()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = decoder.decode(bb, cb, endOfInput); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk, if // this continues the char buffer will get resized bc.position(bb.position()); cc.limit(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.position(bb.position()); cc.limit(cb.position()); // Put leftovers in the leftovers byte buffer if (bc.remaining() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(bc.remaining()); bc.get(leftovers.array(), 0, bc.remaining()); } } } public Charset getCharset() { return decoder.charset(); } }