/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2008 Alex Buloichik
2009 Wildrich Fourie, Didier Briel, Alex Buloichik
2013 Aaron Madlon-Kay, Alex Buloichik
2015 Didier Briel, Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.gui.glossary;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import org.omegat.core.Core;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.gui.common.EntryInfoSearchThread;
import org.omegat.tokenizer.DefaultTokenizer;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.tokenizer.ITokenizer.StemmingMode;
import org.omegat.util.Preferences;
import org.omegat.util.StringUtil;
import org.omegat.util.Token;
/**
* Class for find glossary entries for current entry in editor.
*
* This process looks up the source string entries, and find matched glossary
* entries.
* <p>
* Test cases wheter a glossary entry matches a string entry text:
* <ul>
* <li>"Edit" vs "Editing" - doesn't match
* <li>"Old Line" vs "Hold Line" - doesn't match
* <li>"Some Text" vs "There was some text there" - OK!
* <li>"Edit" vs "Editing the edit" - matches OK!
* <li>"Edit" vs "Edit" - matches OK!
* </ul>
*
* @author Keith Godfrey
* @author Maxym Mykhalchuk
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Wildrich Fourie
* @author Didier Briel
* @author Aaron Madlon-Kay
*/
public class FindGlossaryThread extends EntryInfoSearchThread<List<GlossaryEntry>> {
private final String src;
private final GlossaryManager manager;
public FindGlossaryThread(final GlossaryTextArea pane, final SourceTextEntry newEntry,
final GlossaryManager manager) {
super(pane, newEntry);
src = newEntry.getSrcText();
this.manager = manager;
}
@Override
protected List<GlossaryEntry> search() {
ITokenizer tok = Core.getProject().getSourceTokenizer();
if (tok == null) {
return Collections.emptyList();
}
List<GlossaryEntry> entries = manager.getGlossaryEntries(src);
if (entries == null) {
return Collections.emptyList();
}
List<GlossaryEntry> result = new ArrayList<GlossaryEntry>();
// Make comparison case-insensitive
Locale loc = Core.getProject().getProjectProperties().getSourceLanguage().getLocale();
String srcLower = src.toLowerCase(loc);
// Compute source entry tokens
Token[] strTokens = tokenize(tok, srcLower);
for (GlossaryEntry glosEntry : entries) {
checkEntryChanged();
// Computer glossary entry tokens
String glosStr = glosEntry.getSrcText().toLowerCase(loc);
Token[] glosTokens = tokenize(tok, glosStr);
if (glosTokens.length == 0) {
continue;
}
if (DefaultTokenizer.isContainsAll(strTokens, glosTokens,
Preferences.isPreferenceDefault(Preferences.GLOSSARY_NOT_EXACT_MATCH,
Preferences.GLOSSARY_NOT_EXACT_MATCH_DEFAULT))) {
result.add(glosEntry);
continue;
}
if (!Core.getProject().getProjectProperties().getSourceLanguage().isSpaceDelimited()
&& StringUtil.isCJK(glosEntry.getSrcText()) && src.contains(glosEntry.getSrcText())) {
// This is a CJK word and our source language is not space-delimited, so include if
// word appears anywhere in source string.
result.add(glosEntry);
}
}
// After the matched entries have been tokenized and listed.
// We reorder entries: 1) by priority, 2) by length, 3) by alphabet
// Then remove the duplicates and combine the synonyms.
sortGlossaryEntries(result);
return filterGlossary(result);
}
private Token[] tokenize(ITokenizer tok, String str) {
if (Preferences.isPreferenceDefault(Preferences.GLOSSARY_STEMMING,
Preferences.GLOSSARY_STEMMING_DEFAULT)) {
return tok.tokenizeWords(str, StemmingMode.GLOSSARY);
} else {
return tok.tokenizeVerbatim(str);
}
}
static void sortGlossaryEntries(List<GlossaryEntry> entries) {
Collections.sort(entries, new Comparator<GlossaryEntry>() {
public int compare(GlossaryEntry o1, GlossaryEntry o2) {
int p1 = o1.getPriority() ? 1 : 2;
int p2 = o2.getPriority() ? 1 : 2;
int c = p1 - p2;
if (c == 0) {
c = o2.getSrcText().length() - o1.getSrcText().length();
}
if (c == 0) {
c = o1.getSrcText().compareToIgnoreCase(o2.getSrcText());
}
if (c == 0) {
c = o1.getSrcText().compareTo(o2.getSrcText());
}
if (c == 0) {
c = o1.getLocText().compareToIgnoreCase(o2.getLocText());
}
return c;
}
});
}
static List<GlossaryEntry> filterGlossary(List<GlossaryEntry> result) {
// First check that entries exist in the list.
if (result.isEmpty()) {
return result;
}
List<GlossaryEntry> returnList = new LinkedList<GlossaryEntry>();
// The default replace entry
GlossaryEntry replaceEntry = new GlossaryEntry("", "", "", false);
// ... Remove the duplicates from the list
// ..............................
boolean removedDuplicate = false;
for (int i = 0; i < result.size(); i++) {
GlossaryEntry nowEntry = result.get(i);
if (nowEntry.getSrcText().equals(""))
continue;
for (int j = i + 1; j < result.size(); j++) {
GlossaryEntry thenEntry = result.get(j);
if (thenEntry.getSrcText().equals(""))
continue;
// If the Entries are exactely the same, insert a blank entry.
if (nowEntry.getSrcText().equals(thenEntry.getSrcText()))
if (nowEntry.getLocText().equals(thenEntry.getLocText()))
if (nowEntry.getCommentText().equals(thenEntry.getCommentText())) {
result.set(j, replaceEntry);
removedDuplicate = true;
}
}
}
// ......................................................................
// -- Remove the blank entries from the list
// ----------------------------
if (removedDuplicate) {
Iterator<GlossaryEntry> myIter = result.iterator();
List<GlossaryEntry> newList = new LinkedList<GlossaryEntry>();
while (myIter.hasNext()) {
GlossaryEntry checkEntry = myIter.next();
if (checkEntry.getSrcText().equals("") || checkEntry.getLocText().equals(""))
myIter.remove();
else
newList.add(checkEntry);
}
result = newList;
}
// ----------------------------------------------------------------------
// ~~ Group items with same scrTxt
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
for (int i = 0; i < result.size(); i++) {
List<GlossaryEntry> srcList = new LinkedList<GlossaryEntry>();
GlossaryEntry nowEntry = result.get(i);
if (nowEntry.getSrcText().equals(""))
continue;
srcList.add(nowEntry);
for (int j = i + 1; j < result.size(); j++) {
GlossaryEntry thenEntry = result.get(j);
// Double check, needed?
if (thenEntry.getSrcText().equals(""))
continue;
if (nowEntry.getSrcText().equals(thenEntry.getSrcText())) {
srcList.add(thenEntry);
result.set(j, replaceEntry);
}
}
// == Sort items with same locTxt
// ==============================
List<GlossaryEntry> sortList = new LinkedList<GlossaryEntry>();
if (srcList.size() > 1) {
for (int k = 0; k < srcList.size(); k++) {
GlossaryEntry srcNow = srcList.get(k);
if (srcNow.getSrcText().equals(""))
continue;
sortList.add(srcNow);
for (int l = k + 1; l < srcList.size(); l++) {
GlossaryEntry srcThen = srcList.get(l);
if (srcThen.getSrcText().equals(""))
continue;
if (srcNow.getLocText().equals(srcThen.getLocText())) {
sortList.add(srcThen);
srcList.set(l, replaceEntry);
}
}
}
} else {
sortList = srcList;
}
// ==================================================================
// == Now put the sortedList together
// ===============================
String srcTxt = sortList.get(0).getSrcText();
ArrayList<String> locTxts = new ArrayList<String>();
ArrayList<String> comTxts = new ArrayList<String>();
ArrayList<Boolean> prios = new ArrayList<Boolean>();
for (GlossaryEntry e : sortList) {
for (String s : e.getLocTerms(false)) {
locTxts.add(s);
}
for (String s : e.getComments()) {
comTxts.add(s);
}
for (boolean s : e.getPriorities()) {
prios.add(s);
}
}
boolean[] priorities = new boolean[prios.size()];
for (int j = 0; j < prios.size(); j++) {
priorities[j] = prios.get(j);
}
GlossaryEntry combineEntry = new GlossaryEntry(srcTxt, locTxts.toArray(new String[locTxts.size()]),
comTxts.toArray(new String[comTxts.size()]), priorities);
returnList.add(combineEntry);
// ==================================================================
}
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return returnList;
}
}