/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.corpus.suffix_array;
import java.util.Collections;
import java.util.List;
import java.util.logging.Logger;
import joshua.corpus.MatchedHierarchicalPhrases;
import joshua.corpus.vocab.SymbolTable;
/**
* HierarchicalPhrases represents a list of matched hierarchical
* phrases.
* <p>
*
* TODO Add unit tests for this class.
*
* @author Lane Schwartz
* @since Jan 9 2009
* @version $LastChangedDate: 2010-02-01 14:37:27 -0600 (Mon, 01 Feb 2010) $
*/
public class HierarchicalPhrases extends AbstractHierarchicalPhrases {
/**
* Represents all locations in the corpus that match the
* <code>pattern</code>.
* <p>
* Specifically, for each location in the corpus that matches
* the pattern, the corpus index of the of the first word
* in each terminal sequence is stored.
* <p>
* The length of this array should be
* <code>size * terminalSequenceLengths.length</code>.
*/
final int[] terminalSequenceStartIndices;
/**
* Represents the sentence numbers of each location in the
* corpus that matches the pattern.
* <p>
* To save memory, this variable could be deleted if the
* actual calculation of this data were moved from the
* constructor to the <code>getSentenceNumber</code> method.
*/
final int[] sentenceNumber;
/** Logger for this class. */
@SuppressWarnings("unused")
private static final Logger logger =
Logger.getLogger(HierarchicalPhrases.class.getName());
/**
* Constructs a list of hierarchical phrases.
*
* @param pattern Pattern common to the list of phrases
* @param startPositions Represents all locations in the
* corpus that match the pattern. Specifically,
* for each location in the corpus that matches
* the pattern, the corpus index of the of the
* first word in each terminal sequence is stored.
* @param sentenceNumbers Represents the sentence number
* of each matched phrase location
*/
public HierarchicalPhrases(Pattern pattern, int[] startPositions, int[] sentenceNumbers) {
// super(pattern, startPositions.length);
super(pattern,
(pattern.getTerminalSequenceLengths().length>0)
? startPositions.length / pattern.getTerminalSequenceLengths().length
: 0);
// this.size = sentenceNumbers.length;//sentenceNumbers.length;
this.terminalSequenceStartIndices = startPositions;
// this.sentenceNumber = new int[size];
this.sentenceNumber = sentenceNumbers;
publicCounter += 1;
}
public static int publicCounter = 0;
public static int protectedCounter = 0;
public static int privateCounter = 0;
public static int emptyListCounter = 0;
public String toString() {
StringBuilder s = new StringBuilder();
s.append(this.pattern.toString());
s.append('\t');
s.append(this.size());
s.append(" locations");
return s.toString();
}
/**
* Constructs a list of hierarchical phrases.
*
* @param pattern Pattern common to the list of phrases
* @param terminalSequenceStartIndices Represents all locations in the corpus
* that match the pattern.
* Specifically, for each location in the corpus
* that matches the pattern, the corpus index of the
* of the first word in each terminal sequence is stored.
* @param sentenceNumbers Represents the sentence number
* of each matched phrase location
*/
protected HierarchicalPhrases(Pattern pattern,
List<Integer> terminalSequenceStartIndices,
List<Integer> sentenceNumbers) {
// super(pattern);
super(pattern,
(pattern.getTerminalSequenceLengths().length>0)
? terminalSequenceStartIndices.size() / pattern.getTerminalSequenceLengths().length
: 0);
// int numberOfPhrases = ;
int dataSize = terminalSequenceStartIndices.size();
// this.size = (terminalSequenceLengths.length>0) ? dataSize / terminalSequenceLengths.length : 0;
this.terminalSequenceStartIndices = new int[dataSize];
for (int i=0; i<dataSize; i++) {
this.terminalSequenceStartIndices[i] = terminalSequenceStartIndices.get(i);
}
this.sentenceNumber = new int[size];
for (int i=0; i<size; i++) {
this.sentenceNumber[i] = sentenceNumbers.get(i);
}
protectedCounter += 1;
// this.size = size;
}
// /* See Javadoc for MatchedHierarchicalPhrases interface. */
// public int size() {
// return size;
// }
/**
* Constructs a list of hierarchical phrases
* identical to the provided list of phrases,
* except that it uses the provided pattern.
*
* @param pattern
* @param phrases
*/
private HierarchicalPhrases(Pattern pattern, HierarchicalPhrases phrases) {
super(pattern, phrases.size);
// super(pattern);
// this.size = phrases.size;
this.terminalSequenceStartIndices = phrases.terminalSequenceStartIndices;
this.sentenceNumber = phrases.sentenceNumber;
privateCounter += 1;
}
/**
* Gets an empty list of hierarchical phrases.
*
* @param vocab Symbol table to associate with the list
* @return an empty list of hierarchical phrases
*/
public static HierarchicalPhrases emptyList(SymbolTable vocab, int... words) {
return emptyList(new Pattern(vocab, words));
}
/**
* Gets an empty list of hierarchical phrases.
*
* @param vocab Symbol table to associate with the list
* @return an empty list of hierarchical phrases
*/
public static HierarchicalPhrases emptyList(Pattern pattern) {
emptyListCounter += 1;
return new HierarchicalPhrases(
pattern,
Collections.<Integer>emptyList(),
Collections.<Integer>emptyList()
);
}
/* See Javadoc for MatchedHierarchicalPhrases interface. */
public MatchedHierarchicalPhrases copyWithInitialX() {
return new HierarchicalPhrases(getPatternWithInitialX(), this);
}
/* See Javadoc for MatchedHierarchicalPhrases interface. */
public MatchedHierarchicalPhrases copyWithFinalX() {
return new HierarchicalPhrases(getPatternWithFinalX(), this);
}
/* See Javadoc for MatchedHierarchicalPhrases interface. */
public int getStartPosition(int phraseIndex, int positionNumber) {
return terminalSequenceStartIndices[phraseIndex*(terminalSequenceLengths.length)+positionNumber];
}
/* See Javadoc for MatchedHierarchicalPhrases interface. */
public boolean isEmpty() {
return ! (size > 0);
}
/* See Javadoc for MatchedHierarchicalPhrases interface. */
public int getSentenceNumber(int phraseIndex) {
return this.sentenceNumber[phraseIndex];
}
}