/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.corpus; import joshua.corpus.suffix_array.Pattern; import joshua.corpus.suffix_array.PatternFormat; /** * Represents a list of matched hierarchical phrases. * * @author Lane Schwartz * @since Apr 4 2009 * @version $LastChangedDate: 2009-06-26 18:39:32 -0500 (Fri, 26 Jun 2009) $ */ public interface MatchedHierarchicalPhrases extends PatternFormat { /** * Gets the pattern associated with this list of phrases. * * @return the pattern associated with this list of phrases */ Pattern getPattern(); /** * Gets the number of contiguous sequences of terminals in * the pattern represented by this object. * * @return The number of contiguous sequences of terminals * in the pattern represented by this object. */ int getNumberOfTerminalSequences(); /** * Gets the position in the corpus for the specified phrase * of the first terminal in the specified sequence of terminals * within that phrase. * * @param phraseIndex Index specifying a phrase in this object * @param positionNumber Specifies a sequence of terminals * within the specified phrase * @return The position in the corpus for the specified phrase * of the first terminal in the specified sequence of terminals * within that phrase */ int getStartPosition(int phraseIndex, int positionNumber); /** * Gets the position in the corpus for the specified phrase * just past the last terminal in the specified sequence of terminals * within that phrase. * * @param phraseIndex Index specifying a phrase in this object * @param positionNumber Specifies a sequence of terminals * within the specified phrase * @return The position in the corpus for the specified phrase * just past the last terminal in the specified sequence of terminals * within that phrase */ int getEndPosition(int phraseIndex, int positionNumber); /** * Gets the number of locations in the corpus that match * the pattern. * * @return The number of locations in the corpus that match * the pattern. */ int size(); /** * Tests if this list has no matches in the corpus. * * @return <code>true</code> if this list has no matches * in the corpus, <code>false</code> otherwise. */ boolean isEmpty(); /** * Gets the index of the sentence from which the specified * phrase was extracted. * * @param phraseIndex Index specifying a phrase in this object * @return The index of the sentence from which the specified * phrase was extracted. */ int getSentenceNumber(int phraseIndex); /** * Gets the number of terminal tokens in the specified * terminal sequence. * * @param i Index of a terminal sequence in this object's * pattern. * @return The number of terminal tokens in the specified * terminal sequence */ int getTerminalSequenceLength(int i); /** * Constructs a new object exactly the same as this object * (specifically, it contains the exact same list of corpus * matches), but prepends the nonterminal X to the pattern * of the returned object. * * @return list of matched phrases identical with updated * pattern */ MatchedHierarchicalPhrases copyWithInitialX(); /** * Constructs a new object exactly the same as this object * (specifically, it contains the exact same list of corpus * matches), but appends the nonterminal X to the pattern. * * @return list of matched phrases identical with updated * pattern */ MatchedHierarchicalPhrases copyWithFinalX(); /////////////////////////////////////////////////////////////////////////// // The methods below ideally would go into a HierarchicalPhrase class. // // However, for efficiency, they are instead incorporated into this class. /////////////////////////////////////////////////////////////////////////// /** * Gets the span in the backing corpus of the phrase at the * specified index. * <p> * <em>Note</em>: The span returned by this method is the * span from the index of the first matched terminal in * this phrase to the one past the index of the last matched * terminal in this phrase. * * @param phraseIndex Index of a matched phrase * @return the span in the backing corpus of the phrase at * the specified index */ Span getSpan(int phraseIndex); boolean containsTerminalAt(int phraseIndex, int alignmentPointIndex); /** * Gets the index in the corpus of the first terminal token * of the <em>n</em>'th matched hierarchical phrase known to * this object, where <em>n</em> is provided as the * <code>phraseIndex</code> parameter. * * @param phraseIndex Index of a matched phrase * @return Index in the corpus of the first terminal token * of the <em>n</em>'th matched hierarchical phrase * known to this object */ int getFirstTerminalIndex(int phraseIndex); /** * Gets the exclusive ending index of the last terminal * sequence of the specified phrase. * * @param phraseIndex Index specifying a phrase in this object * @return the exclusive ending index * of the last terminal sequence of the specified phrase */ int getLastTerminalIndex(int phraseIndex); int getTerminalSequenceStartIndex(int phraseIndex, int sequenceIndex); int getTerminalSequenceEndIndex(int phraseIndex, int sequenceIndex); }