/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2015-2016 Hiroshi Miura, Aaron Madlon-Kay Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.core.dictionaries; import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map.Entry; import org.omegat.util.Language; import org.omegat.util.StringUtil; import org.trie4j.MapTrie; import org.trie4j.doublearray.MapDoubleArray; import org.trie4j.patricia.MapPatriciaTrie; /** * A class that encapsulates the storage and retrieval of string-keyed data. * Usage: * <ol> * <li>Instantiate and insert data with {@link #add(String, Object)} * <li>Call {@link #done()} when done adding data (required!) * <li>Retrieve data with {@link #lookUp(String, boolean)} * </ol> * * @author Aaron Madlon-Kay * * @param <T> * The type of data stored */ public class DictionaryData<T> { private final Language language; private MapDoubleArray<Object> data; private MapTrie<Object> temp; /** * @param language * The dictionary's index language */ public DictionaryData(Language language) { this.language = language; this.temp = new MapPatriciaTrie<>(); } /** * Insert a key=value pair into the data store. Unicode normalization is * performed on the key. The value is stored both for the key and its * lowercase version, if the latter differs. * * @param key * The key * @param value * The value */ public void add(String key, T value) { key = StringUtil.normalizeUnicode(key); doAdd(key, value); String lowerKey = key.toLowerCase(language.getLocale()); if (!key.equals(lowerKey)) { doAdd(lowerKey, value); } } /** * Do the actual storing of the value. Most values are going to be singular, * but dictionaries may store multiple definitions for the same key, so in * that case we store the values in an array. * * @param key * @param value */ private void doAdd(String key, T value) { Object stored = temp.get(key); if (stored == null) { temp.insert(key, value); } else { if (stored instanceof Object[]) { stored = extendArray((Object[]) stored, value); } else { stored = new Object[] { stored, value }; } temp.put(key, stored); } } /** * Return the given array with the given value appended to it. * * @param array * @param value * @return */ Object[] extendArray(Object[] array, Object value) { Object[] newArray = new Object[array.length + 1]; System.arraycopy(array, 0, newArray, 0, array.length); newArray[newArray.length - 1] = value; return newArray; } /** * Finalize the data store. This is <strong>required</strong> to be called * before any lookups can be performed. */ public void done() { data = new MapDoubleArray<>(temp); temp = null; } /** * Look up the given word. * * @param word * The word to look up * @return A list of stored objects matching the given word * @throws IllegalStateException * If {@link #done()} has not yet been called */ public List<Entry<String, T>> lookUp(String word) throws IllegalStateException { return doLookUpWithLowerCase(word, false); } /** * Look up the given word using predictive completion; e.g. "term" will * match "terminology" (and "terminal", etc.). * * @param word * The word to look up * @return A list of stored objects matching the given word * @throws IllegalStateException * If {@link #done()} has not yet been called */ public List<Entry<String, T>> lookUpPredictive(String word) throws IllegalStateException { return doLookUpWithLowerCase(word, true); } private List<Entry<String, T>> doLookUpWithLowerCase(String word, boolean predictive) { List<Entry<String, T>> result = doLookUp(word, predictive); if (result.isEmpty()) { String lowerWord = word.toLowerCase(language.getLocale()); result = doLookUp(lowerWord, predictive); } return result; } private List<Entry<String, T>> doLookUp(String word, boolean predictive) throws IllegalStateException { if (data == null) { throw new IllegalStateException( "Object has not been finalized! You must call done() before doing any lookups."); } List<Entry<String, T>> result = new ArrayList<>(); if (predictive) { data.predictiveSearch(word).forEach(w -> get(w, data.get(w), result)); } else { get(word, data.get(word), result); } return result; } /** * Unpack the given stored object (singular, or array) into the given * collection. * * @param value * @param into */ @SuppressWarnings("unchecked") private <U> void get(U key, Object value, Collection<Entry<U, T>> into) { if (value == null) { return; } if (value instanceof Object[]) { for (Object o : (Object[]) value) { into.add(new AbstractMap.SimpleImmutableEntry<>(key, (T) o)); } } else { into.add(new AbstractMap.SimpleImmutableEntry<>(key, (T) value)); } } /** * Get the number of stored keys. Returns <code>-1</code> if {@link #done()} * has not yet been called. * * @return The number of stored keys */ public int size() { return data == null ? -1 : data.size(); } }