/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.corpus.lexprob; import joshua.corpus.alignment.AlignmentGrid; import joshua.corpus.suffix_array.Pattern; import joshua.corpus.vocab.SymbolTable; import joshua.util.Lists; import joshua.util.Lists.IndexedInt; /** * Implements lexical probability methods * which will be directly reusable by most implementations. * * @author Lane Schwartz */ public abstract class AbstractLexProbs implements LexicalProbabilities { /* See Javadoc for LexicalProbabilities#getTargetGivenSourceAlignments(Pattern,Pattern). */ public AlignmentGrid getTargetGivenSourceAlignments(Pattern targetPattern, Pattern sourcePattern) { SymbolTable sourceVocab = getSourceVocab(); SymbolTable targetVocab = getTargetVocab(); StringBuilder alignmentPoints = new StringBuilder(); for (IndexedInt indexedTarget : Lists.eachWithIndex(targetPattern.getWordIDs())) { final int targetWord = indexedTarget.getValue(); final int targetIndex = indexedTarget.getIndex(); if (targetVocab.isNonterminal(targetWord)) { //TODO Do something special here } else { float max = targetGivenSource(targetWord, null); Integer bestSourceIndex = null; for (IndexedInt indexedSource : Lists.eachWithIndex(sourcePattern.getWordIDs())) { int sourceWord = indexedSource.getValue(); int sourceIndex = indexedSource.getIndex(); if (! sourceVocab.isNonterminal(sourceWord)) { float score = this.targetGivenSource(targetWord, sourceWord); if (score > max) { max = score; bestSourceIndex = sourceIndex; } } } if (bestSourceIndex != null) { alignmentPoints.append(bestSourceIndex); alignmentPoints.append('-'); alignmentPoints.append(targetIndex); alignmentPoints.append(' '); } } } return new AlignmentGrid(alignmentPoints.toString()); } /* See Javadoc for LexicalProbabilities#getSourceGivenTargetAlignments(Pattern,Pattern). */ public AlignmentGrid getSourceGivenTargetAlignments(Pattern sourcePattern, Pattern targetPattern) { SymbolTable sourceVocab = getSourceVocab(); SymbolTable targetVocab = getTargetVocab(); StringBuilder alignmentPoints = new StringBuilder(); for (IndexedInt indexedSource : Lists.eachWithIndex(sourcePattern.getWordIDs())) { int sourceWord = indexedSource.getValue(); int sourceIndex = indexedSource.getIndex(); if (sourceVocab.isNonterminal(sourceWord)) { //TODO Do something special here } else { float max = sourceGivenTarget(sourceWord, null); Integer bestTargetIndex = null; for (IndexedInt indexedTarget : Lists.eachWithIndex(targetPattern.getWordIDs())) { int targetWord = indexedTarget.getValue(); int targetIndex = indexedTarget.getIndex(); if (! targetVocab.isNonterminal(targetWord)) { float score = this.sourceGivenTarget(sourceWord, targetWord); if (score > max) { max = score; bestTargetIndex = targetIndex; } } } if (bestTargetIndex != null) { alignmentPoints.append(sourceIndex); alignmentPoints.append('-'); alignmentPoints.append(bestTargetIndex); alignmentPoints.append(' '); } } sourceIndex += 1; } return new AlignmentGrid(alignmentPoints.toString()); } /* See Javadoc for LexicalProbabilities#lexProbSourceGivenTarget(Pattern,Pattern). */ public float lexProbSourceGivenTarget(Pattern sourcePattern, Pattern targetPattern) { float sourceGivenTarget = 1.0f; for (Integer sourceWord : sourcePattern.getTerminals()) { float max = sourceGivenTarget(sourceWord, null); for (Integer targetWord : targetPattern.getTerminals()) { float score = this.sourceGivenTarget(sourceWord, targetWord); if (score > max) { max = score; } } sourceGivenTarget *= max; } if (sourceGivenTarget <= 0) { sourceGivenTarget = getFloorProbability(); } return sourceGivenTarget; } /* See Javadoc for LexicalProbabilities#lexProbTargetGivenSource(Pattern,Pattern). */ public float lexProbTargetGivenSource(Pattern targetPattern, Pattern sourcePattern) { float targetGivenSource = 1.0f; for (Integer targetWord : targetPattern.getTerminals()) { float max = targetGivenSource(targetWord, null); for (Integer sourceWord : sourcePattern.getTerminals()) { float score = this.targetGivenSource(targetWord, sourceWord); if (score > max) { max = score; } } targetGivenSource *= max; } if (targetGivenSource <= 0) { targetGivenSource = getFloorProbability(); } return targetGivenSource; } }