/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2016 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.gui.align;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
import java.util.stream.Collectors;
import org.omegat.util.Language;
import net.loomchild.maligna.coretypes.Alignment;
/**
* A container class used to store data for alignment. "Bead" is used in NLP literature to refer to a grouping
* of source and target sentences that are held to correlate (are aligned together).
*
* @author Aaron Madlon-Kay
*/
class MutableBead {
/**
* Status flags applied by the user; for user-directed data management only.
*/
enum Status {
DEFAULT, ACCEPTED, NEEDS_REVIEW
}
public final float score;
public final List<String> sourceLines;
public final List<String> targetLines;
public boolean enabled;
public MutableBead.Status status;
private MutableBead(float score, List<String> sourceLines, List<String> targetLines) {
this.score = score;
this.sourceLines = new ArrayList<String>(sourceLines);
this.targetLines = new ArrayList<String>(targetLines);
boolean srcEqualsTrg = sourceLines.equals(targetLines);
this.enabled = !srcEqualsTrg;
this.status = srcEqualsTrg ? MutableBead.Status.ACCEPTED : MutableBead.Status.DEFAULT;
}
public MutableBead(Alignment alignment) {
this(alignment.getScore(), alignment.getSourceSegmentList(), alignment.getTargetSegmentList());
}
public MutableBead(List<String> sourceLines, List<String> targetLines) {
this(Float.MAX_VALUE, sourceLines, targetLines);
}
public MutableBead(String source, String target) {
this(Arrays.asList(source), Arrays.asList(target));
}
/**
* Create an empty new bead. {@link #enabled} is <code>true</code> by
* default because it is assumed that the caller will populate the bead with
* interesting data.
*/
public MutableBead() {
this(Collections.emptyList(), Collections.emptyList());
this.enabled = true;
this.status = Status.DEFAULT;
}
/**
* Get whether or not the bead contains the same number of source and target lines.
*
* @return
*/
public boolean isBalanced() {
return sourceLines.size() == targetLines.size();
}
/**
* Get whether or not the bead is entirely empty (has 0 source lines and 0 target lines).
*
* @return
*/
public boolean isEmpty() {
return sourceLines.isEmpty() && targetLines.isEmpty();
}
/**
* Convert a list of beads to a list of flattened (see {@link #join(Language, List)}) pairs where
* <ol>
* <li>key = source text
* <li>value = target text
* </ol>
*
* @param beads
* List of beads to convert
* @return List of squashed pairs
*/
static List<Entry<String, String>> beadsToEntries(Language srcLang, Language trgLang,
List<MutableBead> beads) {
return beads.stream().filter(bead -> bead.enabled).map(bead -> {
String srcOut = bead.sourceLines.isEmpty() ? null : Util.join(srcLang, bead.sourceLines);
String trgOut = bead.targetLines.isEmpty() ? null : Util.join(trgLang, bead.targetLines);
return new AbstractMap.SimpleImmutableEntry<String, String>(srcOut, trgOut);
}).collect(Collectors.toList());
}
/**
* Get the average score of the list of beads. In mALIGNa the "score" is <code>-ln(probability)</code> of
* the alignment, so lower scores are better. We use {@link Double#MAX_VALUE} as a sentinel for failure to
* calculate (empty list, etc.).
*
* @param beads
* @return Average score, or {@link Double#MAX_VALUE} if incalculable
*/
static double calculateAvgDist(List<MutableBead> beads) {
return beads.stream().mapToDouble(bead -> bead.score).average().orElse(Double.MAX_VALUE);
}
}