/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.data;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.zip.GZIPInputStream;
/**
* A registry for languages.
*
* This is based on the data file <code>language-subtag-registry.txt.tgz</code>.
* This file is retrieved from <a href="http://www.iana.org/assignments/language-subtag-registry">IANA</a>
* and compressed with gzip.
*/
public class LanguageRegistry {
/**
* Lock object for initialization.
*/
private final static Object lock = new Object();
/**
* List of languages.
*/
private static List<LanguageRegistry.Language> languages;
/**
* List of scripts.
*/
private static List<LanguageRegistry.Script> scripts;
/**
* List of regions.
*/
private static List<LanguageRegistry.Region> regions;
/**
* List of variants.
*/
private static List<LanguageRegistry.Variant> variants;
/**
* Constructor.
*/
public LanguageRegistry() {
try {
initializeRegistry();
} catch (IOException e) {
// TODO
}
}
/**
* @return List of all languages.
*/
public List<LanguageRegistry.Language> getLanguages() {
return languages;
}
/**
* @param code Language code.
* @return Language with the specified code.
*/
public LanguageRegistry.Language getLanguage(String code) {
if (code == null) {
return null;
}
for (LanguageRegistry.Language language : languages) {
if (code.equalsIgnoreCase(language.getCode())) {
return language;
}
}
return null;
}
/**
* @param language Language.
* @return List of all scripts available for the specified language.
*/
public List<LanguageRegistry.Script> getScripts(LanguageRegistry.Language language) {
List<LanguageRegistry.Script> tmpScripts = new ArrayList<LanguageRegistry.Script>(scripts.size());
for (LanguageRegistry.Script script : scripts) {
if ((language == null) ||
(language.getSuppressScript() == null) ||
(!language.getSuppressScript().equalsIgnoreCase(script.getCode()))) {
tmpScripts.add(script);
}
}
return tmpScripts;
}
/**
* @param code Script code.
* @return Script with the specified code.
*/
public LanguageRegistry.Script getScript(String code) {
if (code == null) {
return null;
}
for (LanguageRegistry.Script script : scripts) {
if (code.equalsIgnoreCase(script.getCode())) {
return script;
}
}
return null;
}
/**
* @return List of all regions.
*/
public List<LanguageRegistry.Region> getRegions() {
return regions;
}
/**
* @param code Region code.
* @return Region with the specified code.
*/
public LanguageRegistry.Region getRegion(String code) {
if (code == null) {
return null;
}
for (LanguageRegistry.Region region : regions) {
if (code.equalsIgnoreCase(region.getCode())) {
return region;
}
}
return null;
}
/**
* @return List of all variants.
*/
public List<LanguageRegistry.Variant> getVariants(String prefix) {
List<LanguageRegistry.Variant> tmpVariants = new ArrayList<LanguageRegistry.Variant>();
for (LanguageRegistry.Variant variant : variants) {
if ((prefix == null) ||
(variant.isPossibleForPrefix(prefix))) {
tmpVariants.add(variant);
}
}
return tmpVariants;
}
/**
* @param code Variant code.
* @return Variant with the specified code.
*/
public LanguageRegistry.Variant getVariant(String code) {
if (code == null) {
return null;
}
for (LanguageRegistry.Variant variant : variants) {
if (code.equalsIgnoreCase(variant.getCode())) {
return variant;
}
}
return null;
}
/**
* Initialize registry.
* @throws IOException
*/
private static void initializeRegistry() throws IOException {
synchronized (lock) {
if (languages == null) {
// Open file containing the registry
URL url = LanguageRegistry.class.getClassLoader().getResource(
"org/wikipediacleaner/api/data/language-subtag-registry.txt.gz");
if (url == null) {
return;
}
BufferedReader reader = new BufferedReader(new InputStreamReader(
new GZIPInputStream(url.openStream()), "UTF8"));
List<LanguageRegistry.Language> tmpLanguages = new ArrayList<LanguageRegistry.Language>();
List<LanguageRegistry.Script> tmpScripts = new ArrayList<LanguageRegistry.Script>();
List<LanguageRegistry.Region> tmpRegions = new ArrayList<LanguageRegistry.Region>();
List<LanguageRegistry.Variant> tmpVariants = new ArrayList<LanguageRegistry.Variant>();
String line = null;
while ((line = reader.readLine()) != null) {
List<String> lines = new ArrayList<String>();
while ((line != null) && (!"%%".equals(line.trim()))) {
lines.add(line);
line = reader.readLine();
}
if ("Type".equals(getElementName(0, lines))) {
String type = getElementValue(0, lines);
if ("language".equalsIgnoreCase(type)) {
if ("Subtag".equalsIgnoreCase(getElementName(1, lines))) {
Language language = new Language(getElementValue(1, lines));
boolean shouldKeep = true;
for (int lineNum = 2; lineNum < lines.size(); lineNum++) {
String name = getElementName(lineNum, lines);
String value = getElementValue(lineNum, lines);
if ("Description".equalsIgnoreCase(name)) {
language.addDescription(value);
} else if ("Comments".equalsIgnoreCase(name)) {
language.addComments(value);
} else if ("Suppress-Script".equalsIgnoreCase(name)) {
language.setSuppressScript(value);
} else if ("Scope".equalsIgnoreCase(name)) {
if ("special".equalsIgnoreCase(value)) {
shouldKeep = false;
}
} else if ("Deprecated".equalsIgnoreCase(name)) {
shouldKeep = false;
}
}
if (shouldKeep) {
tmpLanguages.add(language);
}
}
} else if ("script".equalsIgnoreCase(type)) {
if ("Subtag".equalsIgnoreCase(getElementName(1, lines))) {
Script script = new Script(getElementValue(1, lines));
boolean shouldKeep = true;
for (int lineNum = 2; lineNum < lines.size(); lineNum++) {
String name = getElementName(lineNum, lines);
String value = getElementValue(lineNum, lines);
if ("Description".equalsIgnoreCase(name)) {
script.addDescription(value);
} else if("Comments".equalsIgnoreCase(name)) {
script.addComments(value);
}
}
if (shouldKeep) {
tmpScripts.add(script);
}
}
} else if ("region".equalsIgnoreCase(type)) {
if ("Subtag".equalsIgnoreCase(getElementName(1, lines))) {
Region region = new Region(getElementValue(1, lines));
boolean shouldKeep = true;
for (int lineNum = 2; lineNum < lines.size(); lineNum++) {
String name = getElementName(lineNum, lines);
String value = getElementValue(lineNum, lines);
if ("Description".equalsIgnoreCase(name)) {
region.addDescription(value);
} else if ("Comments".equalsIgnoreCase(name)) {
region.addComments(value);
} else if ("Deprecated".equalsIgnoreCase(name)) {
shouldKeep = false;
}
}
if (shouldKeep) {
tmpRegions.add(region);
}
}
} else if ("variant".equalsIgnoreCase(type)) {
if ("Subtag".equalsIgnoreCase(getElementName(1, lines))) {
Variant variant = new Variant(getElementValue(1, lines));
boolean shouldKeep = true;
for (int lineNum = 2; lineNum < lines.size(); lineNum++) {
String name = getElementName(lineNum, lines);
String value = getElementValue(lineNum, lines);
if ("Description".equalsIgnoreCase(name)) {
variant.addDescription(value);
} else if ("Comments".equalsIgnoreCase(name)) {
variant.addComments(value);
} else if ("Prefix".equalsIgnoreCase(name)) {
variant.addPrefix(value);
} else if ("Deprecated".equalsIgnoreCase(name)) {
shouldKeep = false;
}
}
if (shouldKeep) {
tmpVariants.add(variant);
}
}
}
}
}
Collections.sort(tmpLanguages);
languages = Collections.unmodifiableList(tmpLanguages);
Collections.sort(tmpScripts);
scripts = Collections.unmodifiableList(tmpScripts);
Collections.sort(tmpRegions);
regions = Collections.unmodifiableList(tmpRegions);
Collections.sort(tmpVariants);
variants = Collections.unmodifiableList(tmpVariants);
}
}
}
/**
* @param lineNum Current line.
* @param lines Array of lines.
* @return Element name if the line is defining an element.
*/
private static String getElementName(int lineNum, List<String> lines) {
if ((lines == null) || (lineNum >= lines.size())) {
return null;
}
String line = lines.get(lineNum);
if (line.startsWith(" ")) {
return null;
}
int colonIndex = line.indexOf(':');
if (colonIndex <= 0) {
return null;
}
return line.substring(0, colonIndex);
}
/**
* @param lineNum Current line.
* @param lines Array of lines.
* @return Element value if the line is defining an element.
*/
private static String getElementValue(int lineNum, List<String> lines) {
if ((lines == null) || (lineNum >= lines.size())) {
return null;
}
String line = lines.get(lineNum);
if (line.startsWith(" ")) {
return null;
}
int colonIndex = line.indexOf(':');
if (colonIndex <= 0) {
return null;
}
String value = line.substring(colonIndex + 1).trim();
while ((lineNum + 1 < lines.size()) &&
(lines.get(lineNum + 1).startsWith(" "))) {
lineNum++;
value += " " + lines.get(lineNum).trim();
}
// TODO: Manage values on several lines
return value;
}
/**
* Base class for holding information.
*/
public static abstract class Element implements Comparable<Element> {
/**
* Language code.
*/
private final String code;
/**
* Description.
*/
private String description;
/**
* Comments.
*/
private String comments;
/**
* @param code Code.
*/
Element(String code) {
this.code = code;
}
/**
* @return Language code.
*/
public String getCode() {
return code;
}
/**
* @param desc Description.
*/
void addDescription(String desc) {
if (this.description == null) {
this.description = desc;
} else {
this.description += ", " + desc;
}
}
/**
* @return Description.
*/
public String getDescription() {
return description;
}
/**
* @param commentary Comments
*/
void addComments(String commentary) {
if (this.comments == null) {
this.comments = commentary;
} else {
this.comments += ", " + commentary;
}
}
/**
* @return Comments
*/
public String getComments() {
return comments;
}
/**
* @param o Other element.
* @return a negative integer, zero, or a positive integer as the code
* is less than, equal to, or greater than the code of the specified element.
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
@Override
public int compareTo(Element o) {
return code.compareTo(o.code);
}
/**
* @return String representation of the language.
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
if (description != null) {
return code + " - " + description;
}
return code;
}
}
/**
* Bean for holding information about a language.
*/
public static class Language extends Element {
/**
* Implicit script, non necessary.
*/
private String suppressScript;
/**
* @param code Language code.
*/
Language(String code) {
super(code);
}
/**
* @param script Implicit script.
*/
void setSuppressScript(String script) {
this.suppressScript = script;
}
/**
* @return Implicit script.
*/
String getSuppressScript() {
return this.suppressScript;
}
}
/**
* Bean for holding information about a script.
*/
public static class Script extends Element {
/**
* @param code Script code.
*/
Script(String code) {
super(code);
}
}
/**
* Bean for holding information about a region.
*/
public static class Region extends Element {
/**
* @param code Region code.
*/
Region(String code) {
super(code);
}
}
/**
* Bean for holding information about a variant.
*/
public static class Variant extends Element {
/**
* List of possible prefixes.
*/
private final List<String> prefixes;
/**
* @param code Variant code.
*/
Variant(String code) {
super(code);
this.prefixes = new ArrayList<String>();
}
/**
* @param prefix Prefix.
*/
void addPrefix(String prefix) {
if (!prefixes.contains(prefix)) {
prefixes.add(prefix);
}
}
/**
* @param prefix Prefix.
* @return True if the variant is possible for the prefix.
*/
boolean isPossibleForPrefix(String prefix) {
return prefixes.contains(prefix);
}
}
}