AbstractAlignmentGrids.java example

Explorer
relax-decode-master
- third-party
/* This file is part of the Joshua Machine Translation System.
 * 
 * Joshua is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA
 */
package joshua.corpus.alignment;

import joshua.corpus.Corpus;
import joshua.corpus.Span;

/**
 * Abstract implementation of <code>Alignments</code> interface
 * that includes code likely to be common to implementations which
 * conceptually view alignment points as a grid.
 * <p>
 * This class class implements all methods defined by the 
 * <code>Alignments</code> interface except for {@link #size()}. 
 * 
 * Any concrete child class need only implement that method and
 * the two abstract protected methods defined here.
 * 
 * @author Lane Schwartz
 */
public abstract class AbstractAlignmentGrids extends AbstractAlignments {

	/** Source language corpus. */
	protected final Corpus sourceCorpus;
	
	/** Target language corpus. */
	protected final Corpus targetCorpus;
	
	/**
	 * Constructs an abstract alignments grid.
	 * 
	 * @param sourceCorpus Source language corpus
	 * @param targetCorpus Target language corpus
	 * @param requireTightSpans Indicates whether tight spans 
	 *                          are required during phrase extraction
	 */
	public AbstractAlignmentGrids(Corpus sourceCorpus, Corpus targetCorpus, boolean requireTightSpans) {
		super(requireTightSpans);
		this.sourceCorpus = sourceCorpus;
		this.targetCorpus = targetCorpus;
	}
	
	/**
	 * Gets the indices of all source words aligned to the
	 * specified span in the specified sentence.
	 * <p>
	 * All indices in this method are zero-based.
	 * <p>
	 * The span parameters of this method are relative to the
	 * sentene. So, for example, calling this method to get the
	 * source indices for a target span covering the first three
	 * words of the eight sentence in the parallel corpus, the
	 * following parameter values would be used:
	 * 
	 * <code>getSourcePoints(7, 0, 3)</code>
	 * 
	 * @param sentenceID Index of a sentence in the aligned parallel corpus
	 * @param targetSpanStart Inclusive start index in the target sentence
	 * @param targetSpanEnd Exclusive end index in the target sentence
	 * @return the indices of all source words aligned to the
	 *         specified span in the specified sentence
	 */
	protected abstract int[] getSourcePoints(int sentenceID, int targetSpanStart, int targetSpanEnd);
	
	/**
	 * Gets the indices of all target words aligned to the
	 * specified span in the specified sentence.
	 * <p>
	 * All indices in this method are zero-based.
	 * <p>
	 * The span parameters of this method are relative to the
	 * sentence. So, for example, calling this method to get
	 * the target indices for a source span covering the first
	 * three words of the eight sentence in the parallel corpus,
	 * the following parameter values would be used:
	 * 
	 * <code>getSourcePoints(7, 0, 3)</code>
	 * 
	 * @param sentenceID Index of a sentence in the aligned parallel corpus
	 * @param sourceSpanStart Inclusive start index in the source sentence
	 * @param sourceSpanEnd Exclusive end index in the source sentence
	 * @return the indices of all target words aligned to the
	 *         specified span in the specified sentence
	 */
	protected abstract int[] getTargetPoints(int sentenceID, int sourceSpanStart, int sourceSpanEnd);
	
	/* See Javadoc for Alignments interface. */
	public int[] getAlignedSourceIndices(int targetIndex) {
		
		int sentenceID = targetCorpus.getSentenceIndex(targetIndex);
		int sourceOffset = sourceCorpus.getSentencePosition(sentenceID);
		int targetOffset = targetCorpus.getSentencePosition(sentenceID);
		int normalizedTargetIndex = targetIndex - targetOffset;
				
		int[] sourceIndices = getSourcePoints(sentenceID, normalizedTargetIndex, normalizedTargetIndex+1);
		for (int i=0; i<sourceIndices.length; i++) {
			sourceIndices[i] += sourceOffset;
		}
		
		if (sourceIndices.length==0) {
			return null;
		} else {
			return sourceIndices;
		}
	}

	/* See Javadoc for Alignments interface. */
	public Span getAlignedSourceSpan(int startTargetIndex, int endTargetIndex) {
		
		int sentenceID = targetCorpus.getSentenceIndex(startTargetIndex);
		int sourceOffset = sourceCorpus.getSentencePosition(sentenceID);
		int targetOffset = targetCorpus.getSentencePosition(sentenceID);
		int normalizedTargetStartIndex = startTargetIndex - targetOffset;
		int normalizedTargetEndIndex = endTargetIndex - targetOffset;
				
		int[] sourceIndices = getSourcePoints(sentenceID, normalizedTargetStartIndex, normalizedTargetEndIndex);
		
		if (sourceIndices==null || sourceIndices.length==0) {
		
			return new Span(UNALIGNED, UNALIGNED);
		
		} else {
		
			int startSourceIndex = sourceOffset + sourceIndices[0];
			int endSourceIndex = sourceOffset + sourceIndices[sourceIndices.length-1]+1;
			
			return new Span(startSourceIndex, endSourceIndex);
			
		}
		
	}
	
	/* See Javadoc for Alignments interface. */
	public int[] getAlignedTargetIndices(int sourceIndex) {
		
		int sentenceID = sourceCorpus.getSentenceIndex(sourceIndex);
		int targetOffset = targetCorpus.getSentencePosition(sentenceID);
		int sourceOffset = sourceCorpus.getSentencePosition(sentenceID);
		int normalizedSourceIndex = sourceIndex - sourceOffset;
				
		int[] targetIndices = getTargetPoints(sentenceID, normalizedSourceIndex, normalizedSourceIndex+1);
		for (int i=0; i<targetIndices.length; i++) {
			targetIndices[i] += targetOffset;
		}
		
		if (targetIndices.length==0) {
			return null;
		} else {
			return targetIndices;
		}
	}
	
	/* See Javadoc for Alignments interface. */
	public Span getAlignedTargetSpan(int startSourceIndex, int endSourceIndex) {
		
		int sentenceID = sourceCorpus.getSentenceIndex(startSourceIndex);
		int targetOffset = targetCorpus.getSentencePosition(sentenceID);
		int sourceOffset = sourceCorpus.getSentencePosition(sentenceID);
		int normalizedSourceStartIndex = startSourceIndex - sourceOffset;
		int normalizedSourceEndIndex = endSourceIndex - sourceOffset;
		
		int[] targetIndices = getTargetPoints(sentenceID, normalizedSourceStartIndex, normalizedSourceEndIndex);
		
		int[] startPoints = getTargetPoints(sentenceID, normalizedSourceStartIndex, normalizedSourceStartIndex+1);
		
		int[] endPoints = getTargetPoints(sentenceID, normalizedSourceEndIndex-1, normalizedSourceEndIndex);
		
		if (targetIndices==null || targetIndices.length==0 || (requireTightSpans && (
				startPoints==null || startPoints.length==0 ||
				endPoints==null || endPoints.length==0))) {
		
			return new Span(UNALIGNED, UNALIGNED);
		
		} else {
		
			int startTargetIndex = targetOffset + targetIndices[0];
			int endTargetIndex = targetOffset + targetIndices[targetIndices.length-1]+1;
			
			return new Span(startTargetIndex, endTargetIndex);
		}
	}

}