package org.apache.lucene.analysis.tr.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.CharArrayMap;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* Compiles a decision list into a hash where keys are patterns and
* values give the rank and the classification of the decision list. The
* sign of a value gives the classification (positive implies t, negative
* implies nil), and the absolute value gives the rank (smaller rank
* means higher priority).
*/
public final class PatternTableFactory {
/**
* Converts turkish characters into ascii equivalent
*/
public static final Map<Character, Character> turkish_asciify_table =
Collections.unmodifiableMap(
new HashMap<Character, Character>() {{
put('ç', 'c');
put('Ç', 'C');
put('ğ', 'g');
put('Ğ', 'G');
put('ö', 'o');
put('Ö', 'O');
put('ü', 'u');
put('Ü', 'U');
put('ı', 'i');
put('İ', 'I');
put('ş', 's');
put('Ş', 'S');
}}
);
public static final Map<Character, Character> turkish_downcase_asciify_table =
Collections.unmodifiableMap(
new HashMap<Character, Character>() {{
put('ç', 'c');
put('Ç', 'c');
put('ğ', 'g');
put('Ğ', 'g');
put('ö', 'o');
put('Ö', 'o');
put('ı', 'i');
put('İ', 'i');
put('ş', 's');
put('Ş', 's');
put('ü', 'u');
put('Ü', 'u');
// for ch in string.uppercase:
// turkish_downcase_asciify_table[ch] = ch.lower()
// turkish_downcase_asciify_table[ch.lower()] = ch.lower()
put('A', 'a');
put('B', 'b');
put('C', 'c');
put('D', 'd');
put('E', 'e');
put('F', 'f');
put('G', 'g');
put('H', 'h');
put('I', 'i');
put('J', 'j');
put('K', 'k');
put('L', 'l');
put('M', 'm');
put('N', 'n');
put('O', 'o');
put('P', 'p');
put('Q', 'q');
put('R', 'r');
put('S', 's');
put('T', 't');
put('U', 'u');
put('V', 'v');
put('W', 'w');
put('X', 'x');
put('Y', 'y');
put('Z', 'z');
put('a', 'a');
put('b', 'b');
put('c', 'c');
put('d', 'd');
put('e', 'e');
put('f', 'f');
put('g', 'g');
put('h', 'h');
put('i', 'i');
put('j', 'j');
put('k', 'k');
put('l', 'l');
put('m', 'm');
put('n', 'n');
put('o', 'o');
put('p', 'p');
put('q', 'q');
put('r', 'r');
put('s', 's');
put('t', 't');
put('u', 'u');
put('v', 'v');
put('w', 'w');
put('x', 'x');
put('y', 'y');
put('z', 'z');
}}
);
/**
* Lowercase the string except for Turkish accented characters which are converted to uppercase ascii equivalent.
* Useful for pattern matching. Handles all 3 encodings.
* The confusing case of i is as follows: i => i, dotted I => i, dotless i => I, I => I"
*/
public static final Map<Character, Character> turkish_upcase_accents_table =
Collections.unmodifiableMap(
new HashMap<Character, Character>() {{
put('ç', 'C');
put('Ç', 'C');
put('ğ', 'G');
put('Ğ', 'G');
put('ö', 'O');
put('Ö', 'O');
put('ı', 'I');
put('İ', 'i');
put('ş', 'S');
put('Ş', 'S');
put('ü', 'U');
put('Ü', 'U');
put('A', 'a');
put('B', 'b');
put('C', 'c');
put('D', 'd');
put('E', 'e');
put('F', 'f');
put('G', 'g');
put('H', 'h');
put('I', 'i');
put('J', 'j');
put('K', 'k');
put('L', 'l');
put('M', 'm');
put('N', 'n');
put('O', 'o');
put('P', 'p');
put('Q', 'q');
put('R', 'r');
put('S', 's');
put('T', 't');
put('U', 'u');
put('V', 'v');
put('W', 'w');
put('X', 'x');
put('Y', 'y');
put('Z', 'z');
put('a', 'a');
put('b', 'b');
put('c', 'c');
put('d', 'd');
put('e', 'e');
put('f', 'f');
put('g', 'g');
put('h', 'h');
put('i', 'i');
put('j', 'j');
put('k', 'k');
put('l', 'l');
put('m', 'm');
put('n', 'n');
put('o', 'o');
put('p', 'p');
put('q', 'q');
put('r', 'r');
put('s', 's');
put('t', 't');
put('u', 'u');
put('v', 'v');
put('w', 'w');
put('x', 'x');
put('y', 'y');
put('z', 'z');
}}
);
/**
* Converts turkish characters into ascii equivalent and appropriate
* ascii characters to utf-8 turkish accented versions.
*/
public static final Map<Character, Character> turkish_toggle_accent_table =
Collections.unmodifiableMap(
new HashMap<Character, Character>() {{
put('c', 'ç');
put('C', 'Ç');
put('g', 'ğ');
put('G', 'Ğ');
put('o', 'ö');
put('O', 'Ö');
put('u', 'ü');
put('U', 'Ü');
put('i', 'ı');
put('I', 'İ');
put('s', 'ş');
put('S', 'Ş');
put('ç', 'c');
put('Ç', 'C');
put('ğ', 'g');
put('Ğ', 'G');
put('ö', 'o');
put('Ö', 'O');
put('ü', 'u');
put('Ü', 'U');
put('ı', 'i');
put('İ', 'I');
put('ş', 's');
put('Ş', 'S');
}}
);
static final boolean ignoreCase = false;
private PatternTableFactory() {
}
public static CharArrayMap<Integer> getMap(char c) {
switch (c) {
case 'c':
return MapC.map;
case 'g':
return MapG.map;
case 'i':
return MapI.map;
case 'o':
return MapO.map;
case 's':
return MapS.map;
case 'u':
return MapU.map;
default:
return null;
}
}
@Override
public Object clone() throws CloneNotSupportedException {
throw new CloneNotSupportedException();
}
}