/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ngrinder.common.util; import com.ibm.icu.text.CharsetDetector; import com.ibm.icu.text.CharsetMatch; import org.apache.commons.lang.StringUtils; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.nio.charset.Charset; import static org.ngrinder.common.util.ExceptionUtils.processException; /** * Automatic encoding detection utility. * * @author JunHo Yoon * @since 3.0 */ public abstract class EncodingUtils { private static final int MINIMAL_CONFIDENCE_LEVEL = 70; /** * Decode the byte array with auto encoding detection feature. * * @param data * byte array * @param defaultEncoding * the default encoding if no encoding is sure. * @return decoded string * @throws IOException * occurs when the decoding is failed. */ public static String getAutoDecodedString(byte[] data, String defaultEncoding) throws IOException { return new String(data, detectEncoding(data, defaultEncoding)); } /** * Detect encoding of given data. * * @param data * byte array * @param defaultEncoding * the default encoding if no encoding is sure. * @return encoding name detected encoding name * @throws IOException * occurs when the detection is failed. */ public static String detectEncoding(byte[] data, String defaultEncoding) throws IOException { CharsetDetector detector = new CharsetDetector(); detector.setText(data); CharsetMatch cm = detector.detect(); String estimatedEncoding = cm.getName(); boolean isReliable = Charset.isSupported(estimatedEncoding) && cm.getConfidence() >= MINIMAL_CONFIDENCE_LEVEL; return isReliable ? estimatedEncoding : defaultEncoding; } /** * Encode the given path with UTF-8. * * "/" is not encoded. * @param path path * @return encoded path */ public static String encodePathWithUTF8(String path) { try { StringBuilder result = new StringBuilder(); for (char each : path.toCharArray()) { if (each == '/') { result.append("/"); } else { result.append(URLEncoder.encode(String.valueOf(each), "UTF-8")); } } return result.toString(); } catch (UnsupportedEncodingException e) { throw processException(e); } } }