/* * $Id$ * * Copyright 2012 Valentyn Kolesnikov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.github.xmltopdf; import java.util.HashMap; import java.util.Map; /** * UkrainianToLatin utility class. * * @author Valentyn Kolesnikov * @version $Revision$ $Date$ */ public final class UkrainianToLatin { private static final int INDEX_0 = 0; private static final int INDEX_1 = 1; private static final int INDEX_2 = 2; private static final int INDEX_3 = 3; private static final int INDEX_4 = 4; private static final int INDEX_8 = 8; private static final int LENGTH_2 = 2; private static final int LENGTH_3 = 3; private static final int LENGTH_4 = 4; private static final int LENGTH_8 = 8; private enum Convert { AA("Аа"), BB("Бб"), VV("Вв"), HH("Гг"), GG("Ґґ"), DD("Дд"), EE("Ее"), YeIe("Єє"), ZhZh("Жж"), ZZ("Зз"), YY("Ии"), II("Іі"), YiI("Її"), YI("Йй"), KK("Кк"), LL("Лл"), MM("Мм"), NN("Нн"), OO("Оо"), PP("Пп"), RR("Рр"), SS("Сс"), TT("Тт"), UU("Уу"), FF("Фф"), KhKh("Хх"), TsTs("Цц"), ChCh("Чч"), ShSh("Шш"), ShchShch("Щщ"), YuIu("Юю"), YaIa("Яя"); private String cyrilic; private Convert(String cyrilic) { this.cyrilic = cyrilic; } /** * Gets cyrilic. * @return the cyrilic */ public String getCyrilic() { return cyrilic; } } private static Map<String, ConvertCase> cyrToLat; private static class ConvertCase { private final Convert convert; private final boolean lowcase; public ConvertCase(Convert convert, boolean lowcase) { this.convert = convert; this.lowcase = lowcase; } public Convert getConvert() { return convert; } public boolean isLowcase() { return lowcase; } } static { cyrToLat = new HashMap<String, ConvertCase>(); for (Convert convert : Convert.values()) { cyrToLat.put(convert.getCyrilic().substring(INDEX_0, INDEX_1), new ConvertCase(convert, false)); cyrToLat.put(convert.getCyrilic().substring(INDEX_1, INDEX_2), new ConvertCase(convert, true)); if (convert == Convert.EE) { cyrToLat.put("Ё", new ConvertCase(convert, false)); cyrToLat.put("ё", new ConvertCase(convert, true)); } } } /** * Generates latinic from cyrilic. * @param name the name * @return the result */ public static String generateLat(String name) { StringBuffer result = new StringBuffer(); ConvertCase prevConvertCase = null; for (int index = 0; index < name.length(); index += 1) { String curChar = name.substring(index, index + INDEX_1); String nextChar = index == name.length() - 1 ? null : name.substring(index + INDEX_1, index + INDEX_2); if (curChar.matches("[-'’,]")) { continue; } if (cyrToLat.get(curChar) == null) { if (" ".equals(curChar)) { prevConvertCase = null; result.append(' '); } else if (curChar.matches("\\n")) { result.append(curChar); } continue; } ConvertCase convertCase = cyrToLat.get(curChar); if (prevConvertCase == null) { checkFirstChar(result, convertCase, cyrToLat.get(nextChar) == null ? convertCase : cyrToLat .get(nextChar)); } else { checkMiddleChar(result, convertCase, cyrToLat.get(nextChar) == null ? convertCase : cyrToLat .get(nextChar)); } prevConvertCase = convertCase; } return result.toString(); } /** * Converts first character in the word. * @param result resut buffer to store string in latin * @param convertCase current character object * @param nextConvertCase next character object */ private static void checkFirstChar(StringBuffer result, ConvertCase convertCase, ConvertCase nextConvertCase) { String latName = convertCase.getConvert().name(); switch (latName.length()) { case LENGTH_2: result.append(convertCase.isLowcase() ? latName.substring(INDEX_0, INDEX_1).toLowerCase() : nextConvertCase .isLowcase() ? latName.substring(INDEX_0, INDEX_1) : latName.substring(INDEX_0, INDEX_1) .toUpperCase()); if (convertCase.getConvert() == Convert.ZZ && nextConvertCase.getConvert() == Convert.HH) { result.append(nextConvertCase.isLowcase() ? "g" : "G"); } break; case LENGTH_3: case LENGTH_4: result.append(convertCase.isLowcase() ? latName.substring(INDEX_0, INDEX_2).toLowerCase() : nextConvertCase .isLowcase() ? latName.substring(INDEX_0, INDEX_2) : latName.substring(INDEX_0, INDEX_2) .toUpperCase()); break; case LENGTH_8: result.append(convertCase.isLowcase() ? latName.substring(INDEX_0, INDEX_4).toLowerCase() : nextConvertCase .isLowcase() ? latName.substring(INDEX_0, INDEX_4) : latName.substring(INDEX_0, INDEX_4) .toUpperCase()); break; default: break; } } /** * Converts middle or last character in the word. * @param result resut buffer to store string in latin * @param convertCase current character object * @param nextConvertCase next character object */ private static void checkMiddleChar(StringBuffer result, ConvertCase convertCase, ConvertCase nextConvertCase) { String latName = convertCase.getConvert().name(); switch (latName.length()) { case LENGTH_2: result.append(convertCase.isLowcase() ? latName.substring(INDEX_1, INDEX_2).toLowerCase() : nextConvertCase .isLowcase() ? latName.substring(INDEX_1, INDEX_2) : latName.substring(INDEX_1, INDEX_2) .toUpperCase()); if (convertCase.getConvert() == Convert.ZZ && nextConvertCase.getConvert() == Convert.HH) { result.append(nextConvertCase.isLowcase() ? "g" : "G"); } break; case LENGTH_3: result.append(convertCase.isLowcase() ? latName.substring(INDEX_2, INDEX_3).toLowerCase() : nextConvertCase .isLowcase() ? latName.substring(INDEX_2, INDEX_3) : latName.substring(INDEX_2, INDEX_3) .toUpperCase()); break; case LENGTH_4: result.append(convertCase.isLowcase() ? latName.substring(INDEX_2, INDEX_4).toLowerCase() : nextConvertCase .isLowcase() ? latName.substring(INDEX_2, INDEX_4) : latName.substring(INDEX_2, INDEX_4) .toUpperCase()); break; case LENGTH_8: result.append(convertCase.isLowcase() ? latName.substring(INDEX_4, INDEX_8).toLowerCase() : nextConvertCase .isLowcase() ? latName.substring(INDEX_4, INDEX_8) : latName.substring(INDEX_4, INDEX_8) .toUpperCase()); break; default: break; } } public static void main(String[] args) { final String message = "The utility class to convert ukrainian words to the latin characters.\n\n" + "For docs, license, tests, and downloads, see: https://github.com/javadev/ukrainiantolatin"; System.out.println(message); } }