/*
* Copyright 2015 Skynav, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY SKYNAV, INC. AND ITS CONTRIBUTORS “AS IS” AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL SKYNAV, INC. OR ITS CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.skynav.ttv.verifier.util;
import java.util.Collections;
import java.util.Set;
import com.skynav.ttv.util.Location;
import com.skynav.ttv.util.Reporter;
import com.skynav.ttv.verifier.VerifierContext;
public class Scripts {
private static final String[] iso15924ScriptIdentifiers = new String[] {
"adlm", // Adlam
"afak", // Afaka
"aghb", // Caucasian Albanian
"ahom", // Ahom, Tai Ahom
"arab", // Arabic
"aran", // Arabic (Nastaliq variant)
"armi", // Imperial Aramaic
"armn", // Armenian
"avst", // Avestan
"bali", // Balinese
"bamu", // Bamum
"bass", // Bassa Vah
"batk", // Batak
"beng", // Bengali
"bhks", // Bhaiksuki
"blis", // Blissymbols
"bopo", // Bopomofo
"brah", // Brahmi
"brai", // Braille
"bugi", // Buginese
"buhd", // Buhid
"cakm", // Chakma
"cans", // Unified Canadian Aboriginal Syllabics
"cari", // Carian
"cham", // Cham
"cher", // Cherokee
"cirt", // Cirth
"copt", // Coptic
"cprt", // Cypriot
"cyrl", // Cyrillic
"cyrs", // Cyrillic (Old Church Slavonic variant)
"deva", // Devanagari (Nagari)
"dsrt", // Deseret (Mormon)
"dupl", // Duployan shorthand, Duployan stenography
"egyd", // Egyptian demotic
"egyh", // Egyptian hieratic
"egyp", // Egyptian hieroglyphs
"elba", // Elbasan
"ethi", // Ethiopic (Geʻez)
"geok", // Khutsuri (Asomtavruli and Nuskhuri)
"geor", // Georgian (Mkhedruli)
"glag", // Glagolitic
"goth", // Gothic
"gran", // Grantha
"grek", // Greek
"gujr", // Gujarati
"guru", // Gurmukhi
"hang", // Hangul (Hangŭl, Hangeul)
"hani", // Han (Hanzi, Kanji, Hanja)
"hano", // Hanunoo (Hanunóo)
"hans", // Han (Simplified variant)
"hant", // Han (Traditional variant)
"hatr", // Hatran
"hebr", // Hebrew
"hira", // Hiragana
"hluw", // Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs)
"hmng", // Pahawh Hmong
"hrkt", // Japanese syllabaries (alias for Hiragana + Katakana)
"hung", // Old Hungarian (Hungarian Runic)
"inds", // Indus (Harappan)
"ital", // Old Italic (Etruscan, Oscan, etc.)
"java", // Javanese
"jpan", // Japanese (alias for Han + Hiragana + Katakana)
"jurc", // Jurchen
"kali", // Kayah Li
"kana", // Katakana
"khar", // Kharoshthi
"khmr", // Khmer
"khoj", // Khojki
"kitl", // Khitan large script
"kits", // Khitan small script
"knda", // Kannada
"kore", // Korean (alias for Hangul + Han)
"kpel", // Kpelle
"kthi", // Kaithi
"lana", // Tai Tham (Lanna)
"laoo", // Lao
"latf", // Latin (Fraktur variant)
"latg", // Latin (Gaelic variant)
"latn", // Latin
"leke", // Leke
"lepc", // Lepcha (Róng)
"limb", // Limbu
"lina", // Linear A
"linb", // Linear B
"lisu", // Lisu (Fraser)
"loma", // Loma
"lyci", // Lycian
"lydi", // Lydian
"mahj", // Mahajani
"mand", // Mandaic, Mandaean
"mani", // Manichaean
"marc", // Marchen
"maya", // Mayan hieroglyphs
"mend", // Mende Kikakui
"merc", // Meroitic Cursive
"mero", // Meroitic Hieroglyphs
"mlym", // Malayalam
"modi", // Modi, Moḍī
"mong", // Mongolian
"moon", // Moon (Moon code, Moon script, Moon type)
"mroo", // Mro, Mru
"mtei", // Meitei Mayek (Meithei, Meetei)
"mult", // Multani
"mymr", // Myanmar (Burmese)
"narb", // Old North Arabian (Ancient North Arabian)
"nbat", // Nabataean
"nkgb", // Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba)
"nkoo", // N’Ko
"nshu", // Nüshu
"ogam", // Ogham
"olck", // Ol Chiki (Ol Cemet’, Ol, Santali)
"orkh", // Old Turkic, Orkhon Runic
"orya", // Oriya
"osge", // Osage
"osma", // Osmanya
"palm", // Palmyrene
"pauc", // Pau Cin Hau
"perm", // Old Permic
"phag", // Phags-pa
"phli", // Inscriptional Pahlavi
"phlp", // Psalter Pahlavi
"phlv", // Book Pahlavi
"phnx", // Phoenician
"plrd", // Miao (Pollard)
"prti", // Inscriptional Parthian
"qaaa", // Reserved for private use (start)
"qabx", // Reserved for private use (end)
"rjng", // Rejang (Redjang, Kaganga)
"roro", // Rongorongo
"runr", // Runic
"samr", // Samaritan
"sara", // Sarati
"sarb", // Old South Arabian
"saur", // Saurashtra
"sgnw", // SignWriting
"shaw", // Shavian (Shaw)
"shrd", // Sharada, Śāradā
"sidd", // Siddham, Siddhaṃ, Siddhamātṛkā
"sind", // Khudawadi, Sindhi
"sinh", // Sinhala
"sora", // Sora Sompeng
"sund", // Sundanese
"sylo", // Syloti Nagri
"syrc", // Syriac
"syre", // Syriac (Estrangelo variant)
"syrj", // Syriac (Western variant)
"syrn", // Syriac (Eastern variant)
"tagb", // Tagbanwa
"takr", // Takri, Ṭākrī, Ṭāṅkrī
"tale", // Tai Le
"talu", // New Tai Lue
"taml", // Tamil
"tang", // Tangut
"tavt", // Tai Viet
"telu", // Telugu
"teng", // Tengwar
"tfng", // Tifinagh (Berber)
"tglg", // Tagalog (Baybayin, Alibata)
"thaa", // Thaana
"thai", // Thai
"tibt", // Tibetan
"tirh", // Tirhuta
"ugar", // Ugaritic
"vaii", // Vai
"visp", // Visible Speech
"wara", // Warang Citi (Varang Kshiti)
"wole", // Woleai
"xpeo", // Old Persian
"xsux", // Cuneiform, Sumero-Akkadian
"yiii", // Yi
"zinh", // Code for inherited script
"zmth", // Mathematical notation
"zsym", // Symbols
"zxxx", // Code for unwritten documents
"zyyy", // Code for undetermined script
"zzzz" // Code for uncoded script
};
private static final Set<String> scripts;
static {
Set<String> s = new java.util.HashSet<String>();
for (String id : iso15924ScriptIdentifiers)
s.add(id);
scripts = Collections.unmodifiableSet(s);
}
public static boolean isScript(String value, Location location, VerifierContext context, String[] outputScript) {
String script = value;
do {
if (Keywords.isNone(script))
break;
else if (Keywords.isAuto(script))
break;
else if (isScriptIdentifier(script))
break;
else
return false;
} while (false);
if (outputScript != null)
outputScript[0] = script;
return true;
}
public static void badScript(String value, Location location, VerifierContext context) {
Reporter reporter = context.getReporter();
reporter.logInfo(reporter.message(location.getLocator(), "*KEY*", "Bad script expression ''{0}''.", value));
}
public static boolean isScriptIdentifier(String s) {
return scripts.contains(s);
}
}