/* LanguageSplitter.java - TLang functions for splitting list of languages * (e.g. for dropdown menu in spinner or choice box). * * Copyright (c) 2012 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under EPL/LGPL/GPL/AL/BSD multi-license. */ package wikokit.base.wikt.sql.lang; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.Map; import wikokit.base.wikipedia.language.LanguageType; import wikokit.base.wikt.sql.TLang; /** TLang functions for splitting list of languages * (e.g. for dropdown menu in spinner or choice box). */ public class LanguageSplitter { private final static TLang[] NULL_TLANG_ARRAY = new TLang[0]; /** Three parts of languages, (1) ordered by number of entries, * (2) languages in each part are ordered lexicographically */ TLang [] part1, part2, part3; /* Sizes of part1[] and part2, * they will be calculated on the base of border1 and border2 */ int part1_end, part2_end; public int getPart1Length () { return part1_end; } public int getPart2Length () { return part2_end; } /** Splits all TLang[] languages of Wiktionary into three parts: * (1) 0 .. part1_end languages with the biggest number of entries * sorted lexicographically (> border1), * (2) part1_end+1 .. part2_end languages with the average number of entries, * (> border2) * (3) part2_end+1 .. end languages with the small number of entries. * * part1_end and part2_end are sizes of part1[] and part2, * they will be calculated on the base of border1 and border2 * * @return true if success. */ public boolean splitAllLangTo3parts (int border1, int border2) { Map<Integer, TLang> lang_map = TLang.getAllTLang(); if(null == lang_map || lang_map.isEmpty()) { System.err.println("Error: LangChoiceBox.fillChoiceBoxByLanguages The database is not available."); return false; } TLang[] tlang_array = ((TLang[])lang_map.values().toArray(NULL_TLANG_ARRAY)); List<TLang> nonempty_list = removeEmpty(tlang_array); TLang[] nonempty_array = ((TLang[])nonempty_list.toArray(NULL_TLANG_ARRAY)); TLang[] sorted_by_size_array = sortLanguageBySize(nonempty_array); part1_end = getIndexInSortedArrayDescOfLastBigElement(sorted_by_size_array, border1); part2_end = getIndexInSortedArrayDescOfLastBigElement(sorted_by_size_array, border2); // todo un-comment part1 = /*Arrays.*/copyOfRange(sorted_by_size_array, 0, part1_end); part2 = /*Arrays.*/copyOfRange(sorted_by_size_array, part1_end+1, part2_end); part3 = /*Arrays.*/copyOfRange(sorted_by_size_array, part2_end, sorted_by_size_array.length); //part3 = Arrays.copyOfRange(sorted_by_size_array, part2_end, part2_end + 10); part1 = sortLanguageByName(part1); part2 = sortLanguageByName(part2); part3 = sortLanguageByName(part3); // 1) temporary solution, in order to skip 'index_art-oou' and 'index-de' // !remove this line (below), when the new dump will be used part3 = /*Arrays.*/copyOfRange(part3, 0, part3.length - 2); return true; } /** Merges all parts created by splitAllLangTo3parts() into one array. * So, call splitAllLangTo3parts() before this function. */ public TLang[] mergeArrays () { return mergeArrays(part1, part2, part3); } /** Gets array of text lines in the form: * Language name and language code' * The first line - "All languages". */ public static List<String> getLanguageNames(TLang[] langs) { List<String> lines = new ArrayList<String>(); lines.add("All languages"); for(int i=0; i < langs.length; i++) { TLang l = langs[i]; lines.add(l.getLanguage().getName() + " " + l.getLanguage().getCode() + " " + l.getNumberPOS() + " " + l.getNumberTranslations()); } return lines; } /** Gets array of text lines in the form: * "Language name, code, number of semantic relations" (header). * The first line is a header. */ public static String[] getLangCodeStatistics(TLang[] langs) { String lines[] = new String[langs.length + 1]; lines[0] = "Language, code, entries, translations"; for(int i=0; i < langs.length; i++) { TLang tl = langs[i]; LanguageType l = langs[i].getLanguage(); lines[i+1]= l.getName() + " " + l.getCode() + " " + tl.getNumberPOS() + " " + tl.getNumberTranslations(); } return lines; } /** Merges three arrays into one list. */ private TLang[] mergeArrays (TLang[] a,TLang[] b,TLang[] c) { List<TLang> result = new ArrayList<TLang>(a.length + b.length + c.length); for(TLang e : a) result.add(e); for(TLang e : b) result.add(e); for(TLang e : c) result.add(e); return ((TLang[])result.toArray(NULL_TLANG_ARRAY)); } /** Gets index i in the sorted array (descending order), so that * array[0..i] >= value and value > array[i+1..end] */ private static int getIndexInSortedArrayDescOfLastBigElement ( TLang[] array, int value) { for(int i=0; i< array.length; i++) if(array[i].getNumberPOS() < value) return i; return array.length - 1; } /** Removes empty element, i.e. languages with zero number of POS and absent translations. */ private static List<TLang> removeEmpty(TLang[] langs) { List<TLang> result = new ArrayList<TLang>(); int empty_lang = 0; for(int i=0; i < langs.length; i++) { TLang l = langs[i]; if(l.getNumberPOS() > 0 || l.getNumberTranslations() > 0) { result.add(l); } else { empty_lang ++; } } System.out.println("Languages with entries: " + result.size() + "; empty languages: " + empty_lang); return result; } /** Sorts languages by size, i.e. by the number of POS. */ private static TLang[] sortLanguageBySize(TLang[] langs) { Comparator<TLang> by_size = new LanguageSizeComparator(); Arrays.sort(langs, by_size); return langs; } //List<TLang> sorted_lang = LangChoice.(); /** Sorts languages by size, i.e. by the number of POS. */ private static TLang[] sortLanguageByName(TLang[] langs) { Comparator<TLang> by_size = new LanguageNameComparator(); Arrays.sort(langs, by_size); return langs; } /** Implementation of the standard "Arrays.copyOfRange" which is available * only from android SDK API level 9... (2.3. ...) * * Todo: erase this func, when change <uses-sdk android:minSdkVersion=from "8" to "9" /> */ public static TLang[] copyOfRange(TLang[] original, int from, int to) { int newLength = to - from; if (newLength < 0) throw new IllegalArgumentException(from + " > " + to); TLang[] copy = new TLang[newLength]; System.arraycopy(original, from, copy, 0, Math.min(original.length - from, newLength)); return copy; } }