/* * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package com.sun.max.unsafe; import java.io.*; import sun.misc.*; import com.sun.max.lang.*; import com.sun.max.memory.*; import com.sun.max.util.*; import com.sun.max.vm.*; import com.sun.max.vm.hosted.*; /** * Utilities for converting between Java strings and C strings (encoded as UTF8 bytes). */ public final class CString { /** * Denotes an error for methods that cannot denote eror in the result. */ public static boolean parseError; private CString() { } /** * Determines the length of a NULL terminated C string located in natively {@link Memory#allocate(Size) allocated} memory. * @param cString the string for which to get the length * @return the length */ public static Size length(Pointer cString) { Pointer p = cString; while (p.readByte(0) != (byte) 0) { p = p.plus(1); } return p.minus(cString).asSize(); } /** * Gets the byte at given index in C string with bounds check. * * @param cString the C string * @param length length of C string (@see length) * @param index index of byte to get * @return -1 if the index is out of range or the byte at the index */ public static int getByte(Pointer cString, Size length, Offset index) { if (index.lessThan(length.asOffset())) { return cString.readByte(index); } return -1; } /** * Converts a NULL terminated C string located in natively allocated memory to a Java string. */ public static String utf8ToJava(Pointer cString) throws Utf8Exception { final int n = length(cString).toInt(); final byte[] bytes = new byte[n]; Memory.readBytes(cString, n, bytes); return Utf8.utf8ToString(false, bytes); } /** * Creates a NULL terminated C string (in natively allocated} memory) from a Java string. * The returned C string must be deallocated by {@link Memory#deallocate(Address)} when finished with. */ public static Pointer utf8FromJava(String string) { final byte[] utf8 = Utf8.stringToUtf8(string); final Pointer cString = Memory.mustAllocate(utf8.length + 1); Pointer p = cString; for (byte utf8Char : utf8) { p.writeByte(0, utf8Char); p = p.plus(1); } p.writeByte(0, (byte) 0); return cString; } public static byte[] read(InputStream stream) throws IOException { final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); while (true) { final int ch = stream.read(); if (ch < 0) { throw new IOException(); } buffer.write(ch); if (ch == 0) { return buffer.toByteArray(); } } } /** * Fills a given buffer with a zero-terminated sequence of bytes from a source buffer. * * @param source the byte array containing the source bytes * @param start the start offset in {@code source} of the bytes to be written (inclusive) * @param end the end offset of {@code source} of the bytes to be written (exclusive) * @param buffer a pointer to the beginning of the buffer * @param bufferSize the size of the buffer * @return an index into the next byte to be written which is start <= result <= end */ public static int writeBytes(byte[] source, int start, int end, Pointer buffer, int bufferSize) { final int n = Math.min(bufferSize - 1, end - start); for (int i = 0; i < n; i++) { buffer.writeByte(i, source[start + i]); } buffer.writeByte(n, (byte) 0); return start + n; } private static void setWord(long address, int index, long value, boolean unsafe) { if (unsafe) { WithoutAccessCheck.unsafe.putAddress(address + Word.size() * index, value); } else { Pointer.fromLong(address).setWord(index, Address.fromLong(value)); } } private static void writeByte(long address, int offset, int value, boolean unsafe) { if (unsafe) { WithoutAccessCheck.unsafe.putByte(address + offset, (byte) value); } else { Pointer.fromLong(address).writeByte(offset, (byte) value); } } /** * Fills a given buffer with the bytes in the UTF8 representation of a string following by a terminating zero. The * maximum number of bytes written to the buffer is limited to the number of leading characters of {@code string} * that can be completely encoded in {@code bufferSize - 2} bytes. * * @param string the String to write to the buffer * @param buffer a pointer to the beginning of the buffer * @param bufferSize the size of the buffer * @param unsafe specifies if {@link Unsafe} should be used instead of boxed memory * @return a pointer to the position in the buffer following the terminating zero character */ public static long writeUtf8(final String string, final long buffer, final int bufferSize, boolean unsafe) { int position = 0; final int endPosition = bufferSize - 1; for (int i = 0; i < string.length(); i++) { final char ch = string.charAt(i); if ((ch >= 0x0001) && (ch <= 0x007F)) { if (position >= endPosition) { break; } writeByte(buffer, position++, ch, unsafe); } else if (ch > 0x07FF) { if (position + 2 >= endPosition) { break; } writeByte(buffer, position++, (byte) (0xe0 | (byte) (ch >> 12)), unsafe); writeByte(buffer, position++, (byte) (0x80 | ((ch & 0xfc0) >> 6)), unsafe); writeByte(buffer, position++, (byte) (0x80 | (ch & 0x3f)), unsafe); } else { if (position + 1 >= endPosition) { break; } writeByte(buffer, position++, (byte) (0xc0 | (byte) (ch >> 6)), unsafe); writeByte(buffer, position++, (byte) (0x80 | (ch & 0x3f)), unsafe); } } writeByte(buffer, position, 0, unsafe); return buffer + position + 1; } /** * Fills a given buffer with the bytes in the UTF8 representation of a string following by a terminating zero. The * maximum number of bytes written to the buffer is limited to the number of leading characters of {@code string} * that can be completely encoded in {@code bufferSize - 2} bytes. * * @param chars the characters to write to the buffer * @param start the index of the character in {@code string} from which to start copying * @param buffer a pointer to the beginning of the buffer * @param bufferSize the size of the buffer * @return the number of characters from {@code string} written to the buffer */ public static int writePartialUtf8(final char[] chars, final int start, final Pointer buffer, final int bufferSize) { int position = 0; final int endPosition = bufferSize - 1; int i = start; while (i < chars.length) { final char ch = chars[i]; if ((ch >= 0x0001) && (ch <= 0x007F)) { if (position >= endPosition) { break; } buffer.writeByte(position++, (byte) ch); } else if (ch > 0x07FF) { if (position + 2 >= endPosition) { break; } buffer.writeByte(position++, (byte) (0xe0 | (byte) (ch >> 12))); buffer.writeByte(position++, (byte) (0x80 | ((ch & 0xfc0) >> 6))); buffer.writeByte(position++, (byte) (0x80 | (ch & 0x3f))); } else { if (position + 1 >= endPosition) { break; } buffer.writeByte(position++, (byte) (0xc0 | (byte) (ch >> 6))); buffer.writeByte(position++, (byte) (0x80 | (ch & 0x3f))); } i++; } buffer.writeByte(position, (byte) 0); return i; } /** * Fills a given buffer with the bytes in the UTF8 representation of a string following by a terminating zero. The * maximum number of bytes written to the buffer is limited to the number of leading characters of {@code string} * that can be completely encoded in {@code bufferSize - 2} bytes. * * @param string the String to write to the buffer * @param start the index of the character in {@code string} from which to start copying * @param length the number of characters in {@code string} to copy * @param buffer a pointer to the beginning of the buffer * @param bufferSize the size of the buffer * @return the number of characters written to the buffer including the terminating zero */ public static int writePartialUtf8(final String string, final int start, int length, final Pointer buffer, final int bufferSize) { int position = 0; final int endPosition = bufferSize - 1; int i = start; int end = i + length; while (i < end) { final char ch = string.charAt(i); if ((ch >= 0x0001) && (ch <= 0x007F)) { if (position >= endPosition) { break; } buffer.writeByte(position++, (byte) ch); } else if (ch > 0x07FF) { if (position + 2 >= endPosition) { break; } buffer.writeByte(position++, (byte) (0xe0 | (byte) (ch >> 12))); buffer.writeByte(position++, (byte) (0x80 | ((ch & 0xfc0) >> 6))); buffer.writeByte(position++, (byte) (0x80 | (ch & 0x3f))); } else { if (position + 1 >= endPosition) { break; } buffer.writeByte(position++, (byte) (0xc0 | (byte) (ch >> 6))); buffer.writeByte(position++, (byte) (0x80 | (ch & 0x3f))); } i++; } buffer.writeByte(position++, (byte) 0); return position; } public static byte[] toByteArray(Pointer start, int length) { final byte[] buffer = new byte[length]; for (int i = 0; i < length; i++) { buffer[i] = start.getByte(i); } return buffer; } /** * Copies an array of Java strings into an array of C strings. The memory for the C string array and each * element in the array is allocated in one memory chunk. The C string array is first in the chunk, followed by 0 if * {@code appendNullDelimiter == true}, followed by {@code strings.length} null terminated C strings. De-allocating * the memory for the buffer is the responsibility of the caller. * * @param strings an array of Java strings * @param appendNullDelimiter {@code true} if a null delimiter character '\0' should be appended * @param unsafe specifies if {@link Unsafe} should be used instead of boxed memory * @return a buffer that can be cast to the C type {@code char**} and used as the first argument to a C * {@code main} function */ public static long utf8ArrayFromStringArray(String[] strings, boolean appendNullDelimiter, boolean unsafe) { final int nullDelimiter = appendNullDelimiter ? 1 : 0; final int pointerArraySize = Word.size() * (strings.length + nullDelimiter); int bufferSize = pointerArraySize; final int[] utf8Lengths = new int[strings.length]; for (int i = 0; i < strings.length; ++i) { final String s = strings[i]; final int utf8Length = Utf8.utf8Length(s); utf8Lengths[i] = utf8Length; bufferSize += utf8Length + 1; } long buffer = unsafe ? WithoutAccessCheck.unsafe.allocateMemory(bufferSize) : Memory.mustAllocate(bufferSize).toLong(); long stringPointer = buffer + pointerArraySize; for (int i = 0; i < strings.length; ++i) { final String s = strings[i]; setWord(buffer, i, stringPointer, unsafe); stringPointer = CString.writeUtf8(s, stringPointer, utf8Lengths[i] + 1, unsafe); } if (appendNullDelimiter) { setWord(buffer, strings.length, 0L, unsafe); } return buffer; } public static boolean equals(Pointer cstring, String string) { if (cstring.isZero()) { return false; } for (int i = 0; i < string.length(); i++) { final byte ch = cstring.getByte(i); if (ch == 0 || ch != string.charAt(i)) { return false; } } return cstring.getByte(string.length()) == 0; } public static boolean equals(Pointer cstring1, Pointer cstring2) { if (cstring1.isZero() || cstring2.isZero()) { return false; } int i; for (i = 0; i < CString.length(cstring1).toInt(); i++) { final byte ch1 = cstring1.getByte(i); final byte ch2 = cstring2.getByte(i); if (ch2 == 0 || ch1 != ch2) { return false; } } return cstring2.getByte(i) == 0; } /** * Determines if a given C string starts with a given prefix. * * @param cstring the C string to test * @param prefix the prefix to test against * @return {@code true} if {@code cstring} starts with {@code prefix} */ public static boolean startsWith(Pointer cstring, String prefix) { if (cstring.isZero()) { return false; } for (int i = 0; i < prefix.length(); i++) { final byte ch = cstring.getByte(i); if (ch == 0 || ch != prefix.charAt(i)) { return false; } } return true; } /** * Determines if a given C string ends with a given suffix. * * @param cstring the C string to test * @param suffix the suffix to test against * @return {@code true} if {@code cstring} starts with {@code suffix} */ public static boolean endsWith(Pointer cstring, String suffix) { if (cstring.isZero()) { return false; } int sl = suffix.length(); if (sl == 0) { return true; } int csl = CString.length(cstring).toInt(); if (sl > csl) { return false; } for (int i = sl - 1; i >= 0; i--) { final byte ch = cstring.getByte(csl - 1); if (ch != suffix.charAt(i)) { return false; } csl--; } return true; } /** * Append a (UTF8) {@link String} to a C string. * @param cstring * @param string * @return new C string or {@link Pointer#isZero()} if can't allocate. */ public static Pointer append(Pointer cstring, String string) { Size csl = CString.length(cstring); int sl = string.length(); Pointer result = Memory.allocate(csl.plus(sl).plus(1)); if (result.isZero()) { return result; } Memory.copyBytes(cstring, result, csl); for (int i = 0; i < sl; i++) { result.setByte(i + csl.toInt(), (byte) string.charAt(i)); } result.setByte(csl.toInt() + sl, (byte) 0); return result; } /** * Append two C strings. * @param cstring1 * @param cstring2 * @return new C string or {@link Pointer#isZero()} if can't allocate. */ public static Pointer appendCString(Pointer cstring1, Pointer cstring2) { Size csl1 = CString.length(cstring1); Size csl2 = CString.length(cstring2); Size nl = csl1.plus(csl2); Pointer result = Memory.allocate(nl.plus(1)); if (result.isZero()) { return result; } Memory.copyBytes(cstring1, result, csl1); Memory.copyBytes(cstring2, result.plus(csl1), csl2); result.setByte(nl.toInt(), (byte) 0); return result; } /** * Copies a C string. * * @param cstring * @return new C string or {@link Pointer#isZero()} if can't allocate. */ public static Pointer copy(Pointer cstring) { Size length = CString.length(cstring); Pointer result = Memory.allocate(length.plus(1)); if (result.isZero()) { return result; } Memory.copyBytes(cstring, result, length); result.setByte(length.toInt(), (byte) 0); return result; } /** * Chop suffix from C string. Assert {@code cstring.endsWith(suffix)} * @param cstring * @param suffix * @return newly allocated C string without suffix */ public static Pointer chopSuffix(Pointer cstring, String suffix) { Size csl = CString.length(cstring); int sl = suffix.length(); if (csl.toInt() < sl) { return cstring; } Size nl = csl.minus(sl); Pointer result = Memory.allocate(nl); if (result.isZero()) { return result; } Memory.copyBytes(cstring, result, nl); result.setByte(nl.toInt(), (byte) 0); return result; } /** * Parse a size specification nX, where X := {K, M, G, T, P, k, m, g, t, p}. * * For backwards compatibility with HotSpot, * lower case letters shall have the same respective meaning as the upper case ones, * even though their non-colloquialized definitions would suggest otherwise. * * @param p a pointer to the C string * @param length the maximum length of the C string * @param startIndex the starting index into the C string pointed to by the first argument * @return the scaled value or -1 if error */ public static long parseScaledValue(Pointer p, Size length, int startIndex) { long result = 0L; boolean done = false; int index = startIndex; while (index < length.toInt()) { if (done) { // having any additional characters is an error return -1L; } final int character = getByte(p, length, Offset.fromInt(index)); index++; if ('0' <= character && character <= '9') { result *= 10; result += character - '0'; } else { done = true; switch (character) { case 'K': case 'k': { result *= Longs.K; break; } case 'M': case 'm': { result *= Longs.M; break; } case 'G': case 'g': { result *= Longs.G; break; } case 'T': case 't': { result *= Longs.T; break; } case 'P': case 'p': { result *= Longs.P; break; } default: { // illegal character return -1L; } } } } return result; } public static int parseInt(Pointer pointer) { int result; parseError = false; Pointer ptr = pointer; if ((char) ptr.getByte(0) == '-') { ptr = ptr.plus(1); } result = parseUnsignedInt(ptr); if (result < 0) { parseError = true; return -1; } else { return ptr == pointer ? result : -result; } } public static int parseUnsignedInt(Pointer pointer) { int result = 0; Pointer ptr = pointer; while (true) { final char ch = (char) ptr.getByte(); if (ch == 0) { break; } if (ch >= '0' && ch <= '9') { result *= 10; result += ch - '0'; } else { return -1; } ptr = ptr.plus(1); } return result; } public static long parseUnsignedLong(String string) { long result = 0L; for (int i = 0; i < string.length(); i++) { final char ch = string.charAt(i); if (ch >= '0' && ch <= '9') { result *= 10L; result += string.charAt(i) - '0'; } else { return -1L; } } return result; } /** * Parses a given C string as a floating value. * * @param cstring the C string to parse * @return the value of {@code cstring} as a float or {@link Float#NaN} if {@code cstring} does not contain a valid * float value */ public static float parseFloat(Pointer cstring) { if (MaxineVM.isHosted()) { try { return Float.parseFloat(utf8ToJava(cstring)); } catch (Exception e) { return Float.NaN; } } // Defer to native code so that all the FloatingDecimal logic does not // have to be in the VM boot image. return MaxineVM.native_parseFloat(cstring, Float.NaN); } }