FindGlossaryThread.java example

Explorer
OmegaT-master
/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
          with fuzzy matching, translation memory, keyword search,
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
               2008 Alex Buloichik
               2009 Wildrich Fourie, Didier Briel, Alex Buloichik
               2013 Aaron Madlon-Kay, Alex Buloichik
               2015 Didier Briel, Aaron Madlon-Kay
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.gui.glossary;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;

import org.omegat.core.Core;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.gui.common.EntryInfoSearchThread;
import org.omegat.tokenizer.DefaultTokenizer;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.tokenizer.ITokenizer.StemmingMode;
import org.omegat.util.Preferences;
import org.omegat.util.StringUtil;
import org.omegat.util.Token;

/**
 * Class for find glossary entries for current entry in editor.
 *
 * This process looks up the source string entries, and find matched glossary
 * entries.
 * <p>
 * Test cases wheter a glossary entry matches a string entry text:
 * <ul>
 * <li>"Edit" vs "Editing" - doesn't match
 * <li>"Old Line" vs "Hold Line" - doesn't match
 * <li>"Some Text" vs "There was some text there" - OK!
 * <li>"Edit" vs "Editing the edit" - matches OK!
 * <li>"Edit" vs "Edit" - matches OK!
 * </ul>
 *
 * @author Keith Godfrey
 * @author Maxym Mykhalchuk
 * @author Alex Buloichik (alex73mail@gmail.com)
 * @author Wildrich Fourie
 * @author Didier Briel
 * @author Aaron Madlon-Kay
 */
public class FindGlossaryThread extends EntryInfoSearchThread<List<GlossaryEntry>> {

    private final String src;

    private final GlossaryManager manager;

    public FindGlossaryThread(final GlossaryTextArea pane, final SourceTextEntry newEntry,
            final GlossaryManager manager) {
        super(pane, newEntry);
        src = newEntry.getSrcText();
        this.manager = manager;
    }

    @Override
    protected List<GlossaryEntry> search() {

        ITokenizer tok = Core.getProject().getSourceTokenizer();
        if (tok == null) {
            return Collections.emptyList();
        }

        List<GlossaryEntry> entries = manager.getGlossaryEntries(src);
        if (entries == null) {
            return Collections.emptyList();
        }

        List<GlossaryEntry> result = new ArrayList<GlossaryEntry>();

        // Make comparison case-insensitive
        Locale loc = Core.getProject().getProjectProperties().getSourceLanguage().getLocale();
        String srcLower = src.toLowerCase(loc);

        // Compute source entry tokens
        Token[] strTokens = tokenize(tok, srcLower);

        for (GlossaryEntry glosEntry : entries) {
            checkEntryChanged();

            // Computer glossary entry tokens
            String glosStr = glosEntry.getSrcText().toLowerCase(loc);
            Token[] glosTokens = tokenize(tok, glosStr);
            if (glosTokens.length == 0) {
                continue;
            }

            if (DefaultTokenizer.isContainsAll(strTokens, glosTokens,
                    Preferences.isPreferenceDefault(Preferences.GLOSSARY_NOT_EXACT_MATCH,
                            Preferences.GLOSSARY_NOT_EXACT_MATCH_DEFAULT))) {
                result.add(glosEntry);
                continue;
            }

            if (!Core.getProject().getProjectProperties().getSourceLanguage().isSpaceDelimited()
                    && StringUtil.isCJK(glosEntry.getSrcText()) && src.contains(glosEntry.getSrcText())) {
                // This is a CJK word and our source language is not space-delimited, so include if
                // word appears anywhere in source string.
                result.add(glosEntry);
            }
        }

        // After the matched entries have been tokenized and listed.
        // We reorder entries: 1) by priority, 2) by length, 3) by alphabet
        // Then remove the duplicates and combine the synonyms.
        sortGlossaryEntries(result);
        return filterGlossary(result);
    }

    private Token[] tokenize(ITokenizer tok, String str) {
        if (Preferences.isPreferenceDefault(Preferences.GLOSSARY_STEMMING,
                Preferences.GLOSSARY_STEMMING_DEFAULT)) {
            return tok.tokenizeWords(str, StemmingMode.GLOSSARY);
        } else {
            return tok.tokenizeVerbatim(str);
        }
    }

    static void sortGlossaryEntries(List<GlossaryEntry> entries) {
        Collections.sort(entries, new Comparator<GlossaryEntry>() {
            public int compare(GlossaryEntry o1, GlossaryEntry o2) {
                int p1 = o1.getPriority() ? 1 : 2;
                int p2 = o2.getPriority() ? 1 : 2;
                int c = p1 - p2;
                if (c == 0) {
                    c = o2.getSrcText().length() - o1.getSrcText().length();
                }
                if (c == 0) {
                    c = o1.getSrcText().compareToIgnoreCase(o2.getSrcText());
                }
                if (c == 0) {
                    c = o1.getSrcText().compareTo(o2.getSrcText());
                }
                if (c == 0) {
                    c = o1.getLocText().compareToIgnoreCase(o2.getLocText());
                }
                return c;
            }
        });
    }

    static List<GlossaryEntry> filterGlossary(List<GlossaryEntry> result) {
        // First check that entries exist in the list.
        if (result.isEmpty()) {
            return result;
        }

        List<GlossaryEntry> returnList = new LinkedList<GlossaryEntry>();

        // The default replace entry
        GlossaryEntry replaceEntry = new GlossaryEntry("", "", "", false);

        // ... Remove the duplicates from the list
        // ..............................
        boolean removedDuplicate = false;
        for (int i = 0; i < result.size(); i++) {
            GlossaryEntry nowEntry = result.get(i);

            if (nowEntry.getSrcText().equals(""))
                continue;

            for (int j = i + 1; j < result.size(); j++) {
                GlossaryEntry thenEntry = result.get(j);

                if (thenEntry.getSrcText().equals(""))
                    continue;

                // If the Entries are exactely the same, insert a blank entry.
                if (nowEntry.getSrcText().equals(thenEntry.getSrcText()))
                    if (nowEntry.getLocText().equals(thenEntry.getLocText()))
                        if (nowEntry.getCommentText().equals(thenEntry.getCommentText())) {
                            result.set(j, replaceEntry);
                            removedDuplicate = true;
                        }
            }
        }
        // ......................................................................

        // -- Remove the blank entries from the list
        // ----------------------------
        if (removedDuplicate) {
            Iterator<GlossaryEntry> myIter = result.iterator();
            List<GlossaryEntry> newList = new LinkedList<GlossaryEntry>();

            while (myIter.hasNext()) {
                GlossaryEntry checkEntry = myIter.next();
                if (checkEntry.getSrcText().equals("") || checkEntry.getLocText().equals(""))
                    myIter.remove();
                else
                    newList.add(checkEntry);
            }

            result = newList;
        }
        // ----------------------------------------------------------------------

        // ~~ Group items with same scrTxt
        // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        for (int i = 0; i < result.size(); i++) {
            List<GlossaryEntry> srcList = new LinkedList<GlossaryEntry>();
            GlossaryEntry nowEntry = result.get(i);

            if (nowEntry.getSrcText().equals(""))
                continue;

            srcList.add(nowEntry);

            for (int j = i + 1; j < result.size(); j++) {
                GlossaryEntry thenEntry = result.get(j);

                // Double check, needed?
                if (thenEntry.getSrcText().equals(""))
                    continue;

                if (nowEntry.getSrcText().equals(thenEntry.getSrcText())) {
                    srcList.add(thenEntry);
                    result.set(j, replaceEntry);
                }
            }

            // == Sort items with same locTxt
            // ==============================
            List<GlossaryEntry> sortList = new LinkedList<GlossaryEntry>();
            if (srcList.size() > 1) {
                for (int k = 0; k < srcList.size(); k++) {
                    GlossaryEntry srcNow = srcList.get(k);

                    if (srcNow.getSrcText().equals(""))
                        continue;

                    sortList.add(srcNow);

                    for (int l = k + 1; l < srcList.size(); l++) {
                        GlossaryEntry srcThen = srcList.get(l);

                        if (srcThen.getSrcText().equals(""))
                            continue;

                        if (srcNow.getLocText().equals(srcThen.getLocText())) {
                            sortList.add(srcThen);
                            srcList.set(l, replaceEntry);
                        }
                    }
                }
            } else {
                sortList = srcList;
            }
            // ==================================================================

            // == Now put the sortedList together
            // ===============================
            String srcTxt = sortList.get(0).getSrcText();
            ArrayList<String> locTxts = new ArrayList<String>();
            ArrayList<String> comTxts = new ArrayList<String>();
            ArrayList<Boolean> prios = new ArrayList<Boolean>();

            for (GlossaryEntry e : sortList) {
                for (String s : e.getLocTerms(false)) {
                    locTxts.add(s);
                }
                for (String s : e.getComments()) {
                    comTxts.add(s);
                }
                for (boolean s : e.getPriorities()) {
                    prios.add(s);
                }
            }
            boolean[] priorities = new boolean[prios.size()];
            for (int j = 0; j < prios.size(); j++) {
                priorities[j] = prios.get(j);
            }

            GlossaryEntry combineEntry = new GlossaryEntry(srcTxt, locTxts.toArray(new String[locTxts.size()]),
                    comTxts.toArray(new String[comTxts.size()]), priorities);
            returnList.add(combineEntry);
            // ==================================================================
        }
        // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        return returnList;
    }
}