/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.value; import java.io.InputStreamReader; import java.io.Reader; import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; import org.apache.hadoop.io.InputBuffer; import org.apache.hadoop.io.Text; /** * Utilities for {@link StringOption}. * @since 0.8.0 * @version 0.9.1 */ public final class StringOptionUtil { /** * The internal text encoding. */ public static final Charset ENCODING = StandardCharsets.UTF_8; static final ThreadLocal<DecoderBuffer> DECODER_POOL = ThreadLocal.withInitial(DecoderBuffer::new); private static final ThreadLocal<char[]> CHAR_ARRAY_BUFFERS = ThreadLocal.withInitial(() -> new char[512]); private static final int CHAR_ARRAY_PADDING = 16; private StringOptionUtil() { return; } /** * Returns the number of code-points in the given {@link StringOption}. * If the object does represent neither {@code null} nor a valid character string, this operation may raise an * error or return a wrong count. * @param option the target object * @return the number of code-points in this object, or {@code 0} if the object represents {@code null} * @throws NullPointerException if the {@link StringOption} is/represents {@code null} * @since 0.9.1 */ public static int countCodePoints(StringOption option) { Text text = option.get(); byte[] bytes = text.getBytes(); int len = text.getLength(); int index = 0; int count = 0; while (index < len) { byte b = bytes[index]; if ((b & 0b1000_0000) == 0) { index += 1; } else if ((b & 0b1110_0000) == 0b1100_0000) { index += 2; } else if ((b & 0b1111_0000) == 0b1110_0000) { index += 3; } else if ((b & 0b1111_1000) == 0b1111_0000) { index += 4; } else if ((b & 0b1111_1100) == 0b1111_1000) { index += 5; } else if ((b & 0b1111_1110) == 0b1111_1100) { index += 6; } else { break; } count++; } if (index != len) { throw new IllegalStateException(option.toString()); } return count; } /** * Returns a {@link Reader} to read the text contents in the {@link StringOption}. * @param option the target {@link StringOption} * @return the created reader * @throws NullPointerException if the {@link StringOption} is/represents {@code null} */ public static Reader asReader(StringOption option) { Text text = option.get(); InputBuffer buffer = new InputBuffer(); buffer.reset(text.getBytes(), 0, text.getLength()); return new InputStreamReader(buffer, ENCODING); } /** * Trims the leading/trailing classical whitespace characters in the {@link StringOption}. * This only removes the following characters: * <ul> * <li> {@code "\t" (HT:U+0009)} </li> * <li> {@code "\n" (LF:U+000a)} </li> * <li> {@code "\r" (CR:U+000d)} </li> * <li> {@code " " (SP:U+0020)} </li> * </ul> * This directly modifies the target {@link StringOption}. * @param option the target {@link StringOption} * @throws NullPointerException if the {@link StringOption} is/represents {@code null} */ public static void trim(StringOption option) { Text text = option.get(); byte[] bytes = text.getBytes(); int length = text.getLength(); int start = 0; int last = length - 1; for (; start <= last; start++) { if (isTrimTarget(bytes[start]) == false) { break; } } for (; last >= start; last--) { if (isTrimTarget(bytes[last]) == false) { break; } } if (start == 0 && last == length - 1) { return; } text.set(bytes, start, last + 1 - start); } private static boolean isTrimTarget(byte b) { switch (b) { case '\t': case '\n': case '\r': case ' ': return true; default: return false; } } /** * Appends the text in the second {@link StringOption} into the first one. * This directly modifies the first {@link StringOption}. * @param target the append target * @param contents the text contents to be appended * @throws NullPointerException if the {@link StringOption} is/represents {@code null} */ public static void append(StringOption target, StringOption contents) { Text text = contents.get(); append(target, text); } /** * Appends the text in the second {@link StringOption} into the first one. * This directly modifies the first {@link StringOption}. * @param target the append target * @param contents the text contents to be appended * @throws NullPointerException if the {@link StringOption} is/represents {@code null} */ public static void append(StringOption target, String contents) { Text buffer = StringOption.BUFFER_POOL.get(); buffer.set(contents); append(target, buffer); } private static void append(StringOption target, Text text) { target.get().append(text.getBytes(), 0, text.getLength()); } /** * Appends the text in the given {@link StringOption} into the {@link StringBuilder}. * @param target the append target * @param contents the text contents to be appended * @throws NullPointerException if the {@link StringOption} is/represents {@code null} * @since 0.9.1 */ public static void append(StringBuilder target, StringOption contents) { CharBuffer buffer = DECODER_POOL.get().decode(contents.get()); target.append(buffer); } /** * Parses the given {@link StringOption} which may represent a {@code int} value. * @param contents the text contents * @return the parsed value * @throws NullPointerException if the {@link StringOption} is/represents {@code null} * @throws IllegalArgumentException if the character sequence is wrong * @since 0.9.1 */ public static int parseInt(StringOption contents) { CharBuffer buffer = DECODER_POOL.get().decode(contents.get()); if (buffer.hasRemaining() == false) { throw invalidNumber(contents); } boolean negative = false; char first = buffer.get(0); if (first < '0') { if (first == '+') { buffer.get(); } else if (first == '-') { buffer.get(); negative = true; } } int negativeResult = 0; while (buffer.hasRemaining()) { char c = buffer.get(); int column = Character.digit(c, 10); if (column < 0) { throw invalidNumber(contents); } // check overflow if (negativeResult < (Integer.MIN_VALUE / 10)) { throw invalidNumber(contents); } negativeResult *= 10; // check overflow if (negativeResult < (Integer.MIN_VALUE | column)) { throw invalidNumber(contents); } negativeResult -= column; } if (negative) { return negativeResult; } else { if (negativeResult == Integer.MIN_VALUE) { throw invalidNumber(contents); } return -negativeResult; } } /** * Parses the given {@link StringOption} which may represent a {@code long} value. * @param contents the text contents * @return the parsed value * @throws NullPointerException if the {@link StringOption} is/represents {@code null} * @throws IllegalArgumentException if the character sequence is wrong * @since 0.9.1 */ public static long parseLong(StringOption contents) { CharBuffer buffer = DECODER_POOL.get().decode(contents.get()); if (buffer.hasRemaining() == false) { throw invalidNumber(contents); } boolean negative = false; char first = buffer.get(0); if (first < '0') { if (first == '+') { buffer.get(); } else if (first == '-') { buffer.get(); negative = true; } } long negativeResult = 0; while (buffer.hasRemaining()) { char c = buffer.get(); int column = Character.digit(c, 10); if (column < 0) { throw invalidNumber(contents); } // check overflow if (negativeResult < (Long.MIN_VALUE / 10)) { throw invalidNumber(contents); } negativeResult *= 10; // check overflow if (negativeResult < (Long.MIN_VALUE | column)) { throw invalidNumber(contents); } negativeResult -= column; } if (negative) { return negativeResult; } else { if (negativeResult == Long.MIN_VALUE) { throw invalidNumber(contents); } return -negativeResult; } } /** * Parses the given {@link StringOption} which may represent a decimal value. * @param contents the text contents * @return the parsed decimal value * @throws NullPointerException if the {@link StringOption} is/represents {@code null} * @throws IllegalArgumentException if the character sequence is wrong * @since 0.9.1 */ public static BigDecimal parseDecimal(StringOption contents) { CharBuffer buffer = DECODER_POOL.get().decode(contents.get()); if (buffer.hasRemaining() == false) { throw invalidNumber(contents); } int length = buffer.remaining(); if (buffer.hasArray()) { char[] array = buffer.array(); int offsetInArray = buffer.position() + buffer.arrayOffset(); return new BigDecimal(array, offsetInArray, length); } else { char[] cbuf = borrowCharArrayBuf(length); buffer.get(cbuf, 0, length); return new BigDecimal(cbuf, 0, length); } } private static NumberFormatException invalidNumber(StringOption contents) { return new NumberFormatException(contents.toString()); } private static char[] borrowCharArrayBuf(int length) { char[] cs = CHAR_ARRAY_BUFFERS.get(); if (cs.length < length) { cs = new char[Math.max(length, cs.length + CHAR_ARRAY_PADDING)]; CHAR_ARRAY_BUFFERS.set(cs); } return cs; } private static final class DecoderBuffer { private final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); private CharBuffer charBuffer; DecoderBuffer() { return; } CharBuffer decode(Text text) { if (charBuffer == null || charBuffer.capacity() < text.getLength() * 2) { int newCapacity = Math.max(1024, (int) (text.getLength() * 2.5)); charBuffer = CharBuffer.allocate(newCapacity); } charBuffer.clear(); ByteBuffer bytes = ByteBuffer.wrap(text.getBytes(), 0, text.getLength()); CoderResult result = decoder.decode(bytes, charBuffer, true); if (result.isOverflow() || result.isError()) { try { result.throwException(); } catch (CharacterCodingException e) { throw new IllegalArgumentException(e); } } charBuffer.flip(); return charBuffer; } } }