/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.gui.resources;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* A class for managing charsets. In comparison to the class <code>Charset</code> provided by the Java libraries,
* this class restricts the set of available charset to the most common ones (ordered by frequency of use)
* and it provides consistent and beautified labels.
*
* @author Fabian Prasser
*/
public class Charsets {
/** Name to charset */
private static Map<String, Charset> NAME_TO_CHARSET = new HashMap<>();
/** Default charset */
private static String DEFAULT_CHARSET;
/** List of names of the most commonly used charsets*/
private static String[] MOST_USED_CHARSETS = { "UTF-8",
"ISO-8859-1",
"latin1",
"Windows-1251",
"US-ASCII",
"Shift_JIS",
"Windows-1252",
"GB2312",
"EUC-KR",
"EUC-JP",
"GBK",
"ISO-8859-2",
"ISO-8859-15",
"Windows-1250",
"Windows-1256",
"ISO-8859-9",
"Big5",
"Windows-1254",
"Windows-874",
"US-ASCII",
"TIS-620",
"Windows-1255",
"ISO-8859-7",
"Windows-1253",
"KOI8-R",
"Windows-1257",
"UTF-16",
"UTF-16LE",
"UTF-16BE",
"ksc_5601",
"GB18030",
"Windows-31J",
"ISO-8859-5",
"ISO-8859-8",
"ISO-8859-4",
"ISO-8859-6",
"KOI8-U",
"ISO-2022-JP",
"ISO-8859-13",
"ISO-8859-3",
"Windows-949",
"Big5-HKSCS",
"Windows-1258",
"ISO-8859-11",
"IBM850" };
/** List of objects representing the most commonly used charsets */
private static final String[] AVAILABLE_CHARSETS;
static {
// Prepare
List<String> availableCharsets = new ArrayList<>();
String defaultCharset = null;
// For each name
for (String name : MOST_USED_CHARSETS) {
// Check
Charset charset = null;
try {
charset = Charset.forName(name);
} catch (Exception e) {
// We can live with this
}
// If it exists
if (charset != null) {
// Format name of the charset
name = name.toUpperCase().replace('_', '-').replace(' ', '-');
char[] array = name.toCharArray();
StringBuilder builder = new StringBuilder();
builder.append(array[0]);
for (int i = 1; i < array.length; i++) {
if ((array[i - 1] != '-' && !Character.isDigit(array[i - 1])) && Character.isDigit(array[i])) {
builder.append("-");
}
builder.append(array[i]);
}
// Store
boolean systemDefault = charset.equals(Charset.defaultCharset());
if (systemDefault) {
builder.append(" (").append(Resources.getMessage("Charset.1")).append(")");
defaultCharset = builder.toString();
}
availableCharsets.add(builder.toString());
NAME_TO_CHARSET.put(builder.toString(), charset);
}
}
// Store
AVAILABLE_CHARSETS = availableCharsets.toArray(new String[availableCharsets.size()]);
if (defaultCharset != null) {
DEFAULT_CHARSET = defaultCharset;
} else {
DEFAULT_CHARSET = AVAILABLE_CHARSETS[0];
}
}
/**
* Returns a charset for the given name
* @param name
* @return
*/
public static Charset getCharsetForName(String name) {
return NAME_TO_CHARSET.get(name);
}
/**
* Returns the system's default charset
* @return
*/
public static String getNameOfDefaultCharset() {
return DEFAULT_CHARSET;
}
/**
* Returns a list of available charsets. The list is restricted to the most common charsets.
* @return
*/
public static String[] getNamesOfAvailableCharsets() {
return AVAILABLE_CHARSETS;
}
}