/* This code is part of Freenet. It is distributed under the GNU General
* Public License, version 2 (or at your option any later version). See
* http://www.gnu.org/ for further details of the GPL. */
package freenet.l10n;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.Hashtable;
import java.util.Map;
import freenet.support.io.Closer;
/**
* Provides the content of the ISO639-3 standard for language codes.
* Description of what this standard is (taken from http://www.sil.org/iso639-3/default.asp):
*
* "ISO 639-3 is a code that aims to define three-letter identifiers for all known human languages.
* At the core of ISO 639-3 are the individual languages already accounted for in ISO 639-2.
* The large number of living languages in the initial inventory of ISO 639-3 beyond those already
* included in ISO 639-2 was derived primarily from Ethnologue (15th edition).
* Additional extinct, ancient, historic, and constructed languages have been obtained from Linguist List."
*
* Source of the code tables in here:
* http://www.sil.org/iso639-3/iso-639-3_20100707.tab
*
* @author xor (xor@freenetproject.org)
*/
public final class ISO639_3 {
/**
* A class which represents a language code. It was translated from the example SQL-table-definition on
* http://www.sil.org/iso639-3/download.asp
*
* The quoted texts on the JavaDoc of the member variables are the original comments from the SQL-table-definition.
*
* All members are final, therefore objects of this class do not need to be cloned by clients of ISO639_3.
*/
public static final class LanguageCode implements Comparable<LanguageCode> {
/**
* "The three-letter 639-3 identifier", 3 characters, not null.
*/
public final String id;
/**
* "Equivalent 639-2 identifier of the bibliographic applications code set, if there is one", 3 characters, may be null.
*
*/
public final String part2B;
/**
* "Equivalent 639-2 identifier of the terminology applications code set, if there is one", 3 characters, may be null.
*/
public final String part2T;
/**
* "Equivalent 639-1 identifier, if there is one", 2 characters, may be null.
*/
public final String part1;
public static enum Scope {
Individual,
Macrolanguage,
Special;
private static Scope fromTabFile(String abbreviation) {
if(abbreviation.equals("I")) return Scope.Individual;
else if(abbreviation.equals("M")) return Scope.Macrolanguage;
else if(abbreviation.equals("S")) return Scope.Special;
else throw new IllegalArgumentException("Unknown scope abbreviation: " + abbreviation);
}
};
/**
* The scope of the language, never null.
*/
public final Scope scope;
public static enum Type {
Ancient,
Constructed,
Extinct,
Historical,
Living,
Special;
private static Type fromTabFile(String abbreviation) {
if(abbreviation.equals("A")) return Type.Ancient;
else if(abbreviation.equals("C")) return Type.Constructed;
else if(abbreviation.equals("E")) return Type.Extinct;
else if(abbreviation.equals("H")) return Type.Historical;
else if(abbreviation.equals("L")) return Type.Living;
else if(abbreviation.equals("S")) return Type.Special;
else throw new IllegalArgumentException("Unknwon type abbreviation: " + abbreviation);
}
}
/**
* The type of the language, never null.
*/
public final Type type;
/**
* "Reference language name", never null.
*/
public final String referenceName;
/**
* "Comment relating to one or more of the columns", may be null.
*/
public final String comment;
private LanguageCode(char[] myId, char[] myPart2B, char[] myPart2T, char[] myPart1, Scope myScope, Type myType,
String myReferenceName, String myComment) {
if(myId == null) throw new NullPointerException();
if(myId.length > 3) throw new IllegalArgumentException();
if(myPart2B != null && myPart2B.length > 3) throw new IllegalArgumentException();
if(myPart2T != null && myPart2T.length > 3) throw new IllegalArgumentException();
if(myPart1 != null && myPart1.length > 2) throw new IllegalArgumentException();
if(myScope == null) throw new NullPointerException();
if(myType == null) throw new NullPointerException();
if(myReferenceName == null) throw new NullPointerException();
if(myReferenceName.length() > 150) throw new IllegalArgumentException();
if(myComment != null && myComment.length() > 150) throw new IllegalArgumentException();
id = new String(myId).toLowerCase();
part2B = new String(myPart2B).toLowerCase();
part2T = new String(myPart2T).toLowerCase();
part1 = new String(myPart1).toLowerCase();
scope = myScope;
type = myType;
referenceName = myReferenceName;
comment = myComment;
}
public boolean equals(LanguageCode other) {
return id.equals(other.id);
}
@Override
public boolean equals(Object o) {
if(!(o instanceof LanguageCode))
return false;
return equals((LanguageCode)o);
}
@Override
public int hashCode() {
return id.hashCode();
}
@Override
public int compareTo(LanguageCode o) {
return id.compareTo(o.id);
}
@Override
public String toString() {
return id + " = " + referenceName + " (scope: " + scope + "; type: " + type + ")";
}
}
private static Hashtable<String, LanguageCode> loadFromTabFile() {
final Hashtable<String, LanguageCode> codes = new Hashtable<String, LanguageCode>(7705 * 2);
InputStream in = null;
InputStreamReader isr = null;
BufferedReader br = null;
try {
// Returns null on lookup failures:
in = ISO639_3.class.getClassLoader().getResourceAsStream("freenet/l10n/iso-639-3_20100707.tab");
if (in == null)
throw new RuntimeException("Could not open the language codes resource");
isr = new InputStreamReader(in, "UTF-8");
br = new BufferedReader(isr);
{
String[] headerTokens = br.readLine().split("[\t]");
if(
!headerTokens[0].equals("Id")
|| !headerTokens[1].equals("Part2B")
|| !headerTokens[2].equals("Part2T")
|| !headerTokens[3].equals("Part1")
|| !headerTokens[4].equals("Scope")
|| !headerTokens[5].equals("Language_Type")
|| !headerTokens[6].equals("Ref_Name")
|| !headerTokens[7].equals("Comment")
)
throw new RuntimeException("File header does not match the expected header.");
}
for(String line = br.readLine(); line != null; line = br.readLine()) {
line = line.trim();
if(line.length() == 0)
continue;
final String[] tokens = line.split("[\t]");
if(tokens.length != 8 && tokens.length != 7)
throw new RuntimeException("Line with invalid token amount: " + line);
final LanguageCode newCode = new LanguageCode(
tokens[0].toCharArray(),
tokens[1].toCharArray(),
tokens[2].toCharArray(),
tokens[3].toCharArray(),
LanguageCode.Scope.fromTabFile(tokens[4]),
LanguageCode.Type.fromTabFile(tokens[5]),
tokens[6],
tokens.length==8 ? tokens[7] : null
);
if(codes.put(newCode.id, newCode) != null)
throw new RuntimeException("Duplicate language code: " + newCode);
}
} catch(Exception e) {
throw new RuntimeException(e);
} finally {
Closer.close(br);
Closer.close(isr);
Closer.close(in);
}
return codes;
}
private final Map<String, LanguageCode> allLanguagesCache;
/**
* Constructs a new ISO639_3 and loads the list of languages from the .tab file in the classpath.
* The list is cached for the lifetime of this ISO639_3 object, make sure not to keep the ISO639_3 object alive
* if you only use a small part of all languages.
*
* @throws RuntimeException If the .tab file is not present in the classpath or if parsing fails.
*/
public ISO639_3() {
allLanguagesCache = Collections.unmodifiableMap(loadFromTabFile());
}
/**
* Gets a list of all languages.
*
* @return Returns the map of all ISO639-3 language codes. The key in the returned list is the ID of the language code,
* which is the 3-letter code of ISO639-3. The given map is unmodifiable since it is used for the cache.
*/
public final Map<String, LanguageCode> getLanguages() {
return allLanguagesCache;
}
/**
* Gets a filtered list of languages. The list is cached for the lifetime of this ISO639_3 object.
*
* @param scope Must not be null.
* @param type Must not be null.
* @return Gets a {@link Hashtable} of language codes with the given scope and type. The key in the returned list is the ID
* of the language code, which is the 3-letter code of ISO639-3. The given Hashtable is free for modification.
*/
public final Hashtable<String, LanguageCode> getLanguagesByScopeAndType(LanguageCode.Scope scope, LanguageCode.Type type) {
final Map<String, LanguageCode> all = getLanguages();
final Hashtable<String, LanguageCode> result = new Hashtable<String, LanguageCode>();
for(final LanguageCode c : all.values()) {
if(c.scope.equals(scope) && c.type.equals(type))
result.put(c.id, c); // We do not clone the code because all its fields are final.
}
return result;
}
/**
* @return The special symbolic language code which is supposed to be a category for multiple languages.
*/
public final LanguageCode getMultilingualCode() {
return getLanguages().get("mul");
}
public static void main(String[] args) {
for(LanguageCode c : loadFromTabFile().values()) {
System.out.println(c);
}
}
}