/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.j2objc.nio.charset;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/*-[
#include "com/google/j2objc/nio/charset/IconvCharsetDecoder.h"
#include "com/google/j2objc/nio/charset/IconvCharsetEncoder.h"
#include "java/io/UnsupportedEncodingException.h"
#include "java/lang/System.h"
]-*/
/**
* iOS native charset support.
*
* @author Tom Ball
*/
public class IOSCharset extends Charset {
// CharsetInfo*
private final long charsetInfo;
private final byte[] replacementBytes;
private static Map<String, IOSCharset> encodings = new HashMap<String, IOSCharset>();
public static final IOSCharset DEFAULT_CHARSET = getDefaultCharset();
private IOSCharset(String canonicalName, String[] aliases, long info) {
super(canonicalName, aliases);
charsetInfo = info;
replacementBytes = createReplacementBytes(info);
}
private static native byte[] createReplacementBytes(long infoP) /*-[
CharsetInfo *info = (CharsetInfo *)infoP;
return [IOSByteArray arrayWithBytes:info->replacementBytes count:info->replacementBytesCount];
]-*/;
public native long nsEncoding() /*-[
return ((CharsetInfo *)self->charsetInfo_)->nsEncoding;
]-*/;
@Override
public boolean contains(Charset charset) {
return false;
}
@Override
public native CharsetEncoder newEncoder() /*-[
CharsetInfo *info = (CharsetInfo *)self->charsetInfo_;
return create_ComGoogleJ2objcNioCharsetIconvCharsetEncoder_initWithJavaNioCharsetCharset_withFloat_withFloat_withByteArray_withLong_(
self, info->averageBytesPerChar, info->maxBytesPerChar, self->replacementBytes_,
(jlong)info->iconvName);
]-*/;
@Override
public native CharsetDecoder newDecoder() /*-[
CharsetInfo *info = (CharsetInfo *)self->charsetInfo_;
return create_ComGoogleJ2objcNioCharsetIconvCharsetDecoder_initWithJavaNioCharsetCharset_withFloat_withFloat_withLong_(
self, info->averageCharsPerByte, info->maxCharsPerByte, (jlong)info->iconvName);
]-*/;
public static Set<String> getAvailableCharsetNames() {
return getEncodings().keySet();
}
public static Charset charsetForName(String charsetName) {
// See if an encoding was requested by name.
Map<String, IOSCharset> encodings = getEncodings();
IOSCharset result = encodings.get(charsetName.toUpperCase());
if (result != null) {
return result;
}
// Scan aliases.
for (IOSCharset cs : getEncodings().values()) {
for (String s : cs.aliases()) {
if (s.equalsIgnoreCase(charsetName)) {
return cs;
}
}
}
return null;
}
/*-[
typedef struct {
NSStringEncoding nsEncoding;
const char *iconvName;
NSString *javaName;
NSString **aliases;
unsigned aliasCount;
jfloat averageBytesPerChar;
jfloat maxBytesPerChar;
jfloat averageCharsPerByte;
jfloat maxCharsPerByte;
const jbyte *replacementBytes;
unsigned replacementBytesCount;
} CharsetInfo;
static const NSString *utf8_aliases[] = { @"unicode-1-1-utf-8", @"UTF8" };
static const NSString *ascii_aliases[] = {
@"cp367", @"ascii7", @"ISO646-US", @"646", @"csASCII", @"us", @"iso_646.irv:1983",
@"ISO_646.irv:1991", @"IBM367", @"ASCII", @"default", @"ANSI_X3.4-1986",
@"ANSI_X3.4-1968", @"iso-ir-6", @"ANSI_X3.4-1968", @"ANSI_X3.4-1986" };
static const NSString *eucjp_aliases[] = {
@"eucjis", @"Extended_UNIX_Code_Packed_Format_for_Japanese", @"x-eucjp", @"eucjp",
@"csEUCPkdFmtjapanese", @"x-euc-jp", @"euc_jp" };
static const NSString *iso8859_aliases[] = {
@"csISOLatin1", @"latin1", @"IBM-819", @"iso-ir-100", @"8859_1",
@ "ISO_8859-1:1987", @"ISO_8859-1", @"819", @"l1", @"ISO8859-1",
@"IBM819", @"ISO_8859_1", @"ISO8859_1", @"cp819", @"ISO8859-1" };
static const NSString *shiftjis_aliases[] = {
@"x-sjis", @"shift_jis", @"sjis", @"ms_kanji", @"shift-jis", @"csShiftJIS" };
static const NSString *latin2_aliases[] = {
@"csISOLatin2", @"iso-ir-101", @"ibm-912", @"8859_2", @"l2", @"ISO_8859-2",
@"ibm912", @"912", @"ISO8859-2", @"latin2", @"iso8859_2", @"ISO_8859-2:1987", @"cp912" };
static const NSString *utf16_aliases[] = {
@"utf16", @"Unicode", @"UnicodeBig", @"UTF_16", @"unicode" };
static const NSString *win1251_aliases[] = { @"ansi-1251", @"cp5347", @"cp1251" };
static const NSString *win1252_aliases[] = { @"cp1252", @"cp5348" };
static const NSString *win1253_aliases[] = { @"cp5349", @"cp1253" };
static const NSString *win1254_aliases[] = { @"cp5350", @"cp1254" };
static const NSString *win1250_aliases[] = { @"cp1250", @"cp5346" };
static const NSString *iso2022_aliases[] = {
@"jis_encoding", @"csjisencoding", @"jis", @"iso2022jp", @"csISO2022JP" };
static const NSString *macroman_aliases[] = { @"MacRoman" };
static const NSString *utf16be_aliases[] = {
@"X-UTF-16BE", @"UTF_16BE", @"ISO-10646-UCS-2", @"UnicodeBigUnmarked" };
static const NSString *utf16le_aliases[] = {
@"UnicodeLittleUnmarked", @"UTF_16LE", @"X-UTF-16LE" };
static const NSString *utf32_aliases[] = { @"UTF32", @"UTF_32" };
static const NSString *utf32be_aliases[] = { @"X-UTF-32BE", @"UTF_32BE" };
static const NSString *utf32le_aliases[] = { @"X-UTF-32LE", @"UTF_32LE" };
static const jbyte ascii_replacement[] = { 63 };
static const jbyte utf16be_replacement[] = { -1, -3 };
static const jbyte utf16le_replacement[] = { -3, -1 };
static const jbyte iso2022_replacement[] = { 33, 41 };
static const jbyte utf32be_replacement[] = { 0, 0, -1, -3 };
static const jbyte utf32le_replacement[] = { -3, -1, 0, 0 };
// Encodings from NSString.h.
//
// All encoding names must be uppercase, so map lookups are case-insensitive.
static const CharsetInfo iosCharsets[] = {
{ NSUTF8StringEncoding, "UTF-8", @"UTF-8", utf8_aliases, 2,
1.1f, 3.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSASCIIStringEncoding, "ASCII", @"US-ASCII", ascii_aliases, 16,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSJapaneseEUCStringEncoding, "EUC-JP", @"EUC-JP", eucjp_aliases, 7,
3.0f, 3.0f, 0.5f, 1.0f, ascii_replacement, 1 },
{ NSISOLatin1StringEncoding, "ISO-8859-1", @"ISO-8859-1", iso8859_aliases, 15,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSShiftJISStringEncoding, "SHIFT_JIS", @"SHIFT_JIS", shiftjis_aliases, 6,
2.0f, 2.0f, 0.5f, 1.0f, ascii_replacement, 1 },
{ NSISOLatin2StringEncoding, "ISO-8859-2", @"ISO-8859-2", latin2_aliases, 13,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSUnicodeStringEncoding, "UTF-16", @"UTF-16", utf16_aliases, 5,
2.0f, 4.0f, 0.5f, 1.0f, utf16be_replacement, 2 },
{ NSWindowsCP1251StringEncoding, "CP1251", @"WINDOWS-1251", win1251_aliases, 3,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSWindowsCP1252StringEncoding, "CP1252", @"WINDOWS-1252", win1252_aliases, 2,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSWindowsCP1253StringEncoding, "CP1253", @"WINDOWS-1253", win1253_aliases, 2,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSWindowsCP1254StringEncoding, "CP1254", @"WINDOWS-1254", win1254_aliases, 2,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSWindowsCP1250StringEncoding, "CP1250", @"WINDOWS-1250", win1250_aliases, 2,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSISO2022JPStringEncoding, "ISO-2022-JP", @"ISO-2022-JP", iso2022_aliases, 5,
4.0f, 8.0f, 0.5f, 1.0f, iso2022_replacement, 2 },
{ NSMacOSRomanStringEncoding, "MacRoman", @"X-MACROMAN", macroman_aliases, 1,
1.0f, 1.0f, 1.0f, 1.0f, ascii_replacement, 1 },
{ NSUTF16BigEndianStringEncoding, "UTF-16BE", @"UTF-16BE", utf16be_aliases, 4,
2.0f, 2.0f, 0.5f, 1.0f, utf16be_replacement, 2 },
{ NSUTF16LittleEndianStringEncoding, "UTF-16LE", @"UTF-16LE", utf16le_aliases, 3,
2.0f, 2.0f, 0.5f, 1.0f, utf16le_replacement, 2 },
// "UTF-32" is mapped to NSUTF32BigEndianStringEncoding instead of NSUTF32StringEncoding because
// the former (strangely) encodes in little endian but decodes in big endian. The latter is a
// closer match to Java's "UTF-32".
{ NSUTF32BigEndianStringEncoding, "UTF-32BE", @"UTF-32", utf32_aliases, 2,
4.0f, 4.0f, 0.25f, 1.0f, utf32be_replacement, 4 },
{ NSUTF32BigEndianStringEncoding, "UTF-32BE", @"UTF-32BE", utf32be_aliases, 2,
4.0f, 4.0f, 0.25f, 1.0f, utf32be_replacement, 4 },
{ NSUTF32LittleEndianStringEncoding, "UTF-32LE", @"UTF-32LE", utf32le_aliases, 2,
4.0f, 4.0f, 0.25f, 1.0f, utf32le_replacement, 4 },
};
static const int numIosCharsets = sizeof(iosCharsets) / sizeof(CharsetInfo);
static ComGoogleJ2objcNioCharsetIOSCharset *addEncoding(const CharsetInfo *info) {
IOSObjectArray *aliases = [IOSObjectArray arrayWithObjects:info->aliases
count:info->aliasCount
type:NSString_class_()];
ComGoogleJ2objcNioCharsetIOSCharset *cs =
create_ComGoogleJ2objcNioCharsetIOSCharset_initWithNSString_withNSStringArray_withLong_(
info->javaName, aliases, (jlong)info);
[ComGoogleJ2objcNioCharsetIOSCharset_encodings putWithId:info->javaName withId:cs];
return cs;
}
]-*/
private static native IOSCharset getDefaultCharset() /*-[
NSString *fileEncoding = JavaLangSystem_getPropertyWithNSString_(@"file.encoding");
if (fileEncoding) {
@try {
return (ComGoogleJ2objcNioCharsetIOSCharset *)
JavaNioCharsetCharset_forNameUEEWithNSString_(fileEncoding);
}
@catch (JavaIoUnsupportedEncodingException *e) {
// Fall-through to use system default.
}
}
// Return UTF-8 default, like JRE does.
return addEncoding(&iosCharsets[0]);
]-*/;
private static native Map<String, IOSCharset> getEncodings() /*-[
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
for (jint i = 0; i < numIosCharsets; i++) {
addEncoding(&iosCharsets[i]);
}
});
return ComGoogleJ2objcNioCharsetIOSCharset_encodings;
]-*/;
}