/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2009 Alex Buloichik
2012 Thomas Cordonnier
2013-2014 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.matching;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import org.omegat.core.data.EntryKey;
import org.omegat.util.StringUtil;
import org.omegat.util.TMXProp;
/**
* Class to hold a single fuzzy match.
*
* @author Keith Godfrey
* @author Maxym Mykhalchuk
* @author Thomas Cordonnier
* @author Aaron Madlon-Kay
*/
public class NearString {
public enum MATCH_SOURCE {
MEMORY, TM, FILES
};
public enum SORT_KEY {
SCORE, SCORE_NO_STEM, ADJUSTED_SCORE
}
public NearString(final EntryKey key, final String source, final String translation, MATCH_SOURCE comesFrom,
final boolean fuzzyMark, final int nearScore, final int nearScoreNoStem, final int adjustedScore,
final byte[] nearData, final String projName, final String creator, final long creationDate,
final String changer, final long changedDate, final List<TMXProp> props) {
this.key = key;
this.source = source;
this.translation = translation;
this.comesFrom = comesFrom;
this.fuzzyMark = fuzzyMark;
this.scores = new Scores[] { new Scores(nearScore, nearScoreNoStem, adjustedScore) };
this.attr = nearData;
this.projs = new String[] { projName == null ? "" : projName };
this.props = props;
this.creator = creator;
this.creationDate = creationDate;
this.changer = changer;
this.changedDate = changedDate;
}
public static NearString merge(NearString ns, final EntryKey key, final String source, final String translation,
MATCH_SOURCE comesFrom, final boolean fuzzyMark, final int nearScore, final int nearScoreNoStem,
final int adjustedScore, final byte[] nearData, final String projName, final String creator,
final long creationDate, final String changer, final long changedDate, final List<TMXProp> props) {
List<String> projs = new ArrayList<>();
List<Scores> scores = new ArrayList<>();
projs.addAll(Arrays.asList(ns.projs));
scores.addAll(Arrays.asList(ns.scores));
NearString merged;
if (nearScore > ns.scores[0].score) {
merged = new NearString(key, source, translation, comesFrom, fuzzyMark, nearScore,
nearScoreNoStem, adjustedScore, nearData, null, creator, creationDate, changer, changedDate, props);
projs.add(0, projName);
scores.add(0, merged.scores[0]);
} else {
merged = new NearString(ns.key, ns.source, ns.translation, ns.comesFrom, ns.fuzzyMark, nearScore,
nearScoreNoStem, adjustedScore, ns.attr, null, ns.creator, ns.creationDate, ns.changer,
ns.changedDate, ns.props);
projs.add(projName);
scores.add(merged.scores[0]);
}
merged.projs = projs.toArray(new String[projs.size()]);
merged.scores = scores.toArray(new Scores[scores.size()]);
return merged;
}
@Override
public String toString() {
return String.join(" ", StringUtil.truncate(source, 20), scores[0].toString(), "x" + scores.length);
}
public EntryKey key;
public String source;
public String translation;
public MATCH_SOURCE comesFrom;
public boolean fuzzyMark;
public Scores[] scores;
/** matching attributes of near strEntry */
public byte[] attr;
public String[] projs;
public List<TMXProp> props;
public String creator;
public long creationDate;
public String changer;
public long changedDate;
public static class Scores {
public final int score;
/** similarity score for match without non-word tokens */
public final int scoreNoStem;
/** adjusted similarity score for match including all tokens */
public final int adjustedScore;
public Scores(int score, int scoreNoStem, int adjustedScore) {
this.score = score;
this.scoreNoStem = scoreNoStem;
this.adjustedScore = adjustedScore;
}
public String toString() {
StringBuilder b = new StringBuilder();
b.append("(");
b.append(score);
b.append("/");
b.append(scoreNoStem);
b.append("/");
b.append(adjustedScore);
b.append("%)");
return b.toString();
}
}
public static class ScoresComparator implements Comparator<Scores> {
private final SORT_KEY key;
public ScoresComparator(SORT_KEY key) {
this.key = key;
}
@Override
public int compare(Scores o1, Scores o2) {
int s1 = primaryScore(o1);
int s2 = primaryScore(o2);
if (s1 != s2) {
return s1 > s2 ? 1 : -1;
}
s1 = secondaryScore(o1);
s2 = secondaryScore(o2);
if (s1 != s2) {
return s1 > s2 ? 1 : -1;
}
s1 = ternaryScore(o1);
s2 = ternaryScore(o2);
if (s1 != s2) {
return s1 > s2 ? 1 : -1;
}
return 0;
}
private int primaryScore(Scores s) {
switch(key) {
case SCORE:
return s.score;
case SCORE_NO_STEM:
return s.scoreNoStem;
case ADJUSTED_SCORE:
default:
return s.adjustedScore;
}
}
private int secondaryScore(Scores s) {
switch(key) {
case SCORE:
return s.scoreNoStem;
case SCORE_NO_STEM:
return s.score;
case ADJUSTED_SCORE:
default:
return s.score;
}
}
private int ternaryScore(Scores s) {
switch(key) {
case SCORE:
return s.adjustedScore;
case SCORE_NO_STEM:
return s.adjustedScore;
case ADJUSTED_SCORE:
default:
return s.scoreNoStem;
}
}
}
}