/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.corpus.suffix_array;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import joshua.corpus.CorpusArray;
import joshua.corpus.LabeledSpan;
import joshua.corpus.Span;
import joshua.corpus.suffix_array.HierarchicalPhrases;
import joshua.corpus.suffix_array.Pattern;
import joshua.corpus.suffix_array.SuffixArrayFactory;
import joshua.corpus.vocab.SymbolTable;
import org.testng.Assert;
import org.testng.annotations.Test;
/**
* @author Lane Schwartz
* @version $LastChangedDate: 2009-05-20 14:04:19 -0500 (Wed, 20 May 2009) $
*/
public class HierarchicalPhraseTest {
CorpusArray sourceCorpusArray;
@Test
public void setup() throws IOException {
// String alignmentString =
// "0-0 0-1 1-1 2-1 3-1 0-2 0-3 5-4 4-5 6-5 8-6 8-7 7-8 10-9 12-10 11-11 12-11 13-12 14-13 15-13 16-13 16-14 17-15 18-16 19-17 19-18 19-19 19-20 19-21 20-22 21-24 22-24 25-29 24-31 26-32 27-33 28-34 30-35 31-36 29-37 30-37 31-37 31-38 32-39" + "\n" +
// "0-0 0-1 0-2 1-3 2-5 3-6 4-6 5-7 6-8 7-9 8-10 10-11 12-11 9-12 11-12 12-12 13-13 14-14 18-16 21-17 22-19 22-20 23-20 24-21 25-22 25-23 26-24 27-25 28-25 29-26 30-26 31-26 31-28 32-29 34-30 33-31 35-33 36-34 36-35 37-36" + "\n" +
// "0-0 1-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 9-11 10-12 11-13 12-14 10-15 11-15 12-15 13-16 14-17 15-17 16-17 19-17 18-18 21-19 22-20" + "\n";
String sourceCorpusString =
"declaro reanudado el período de sesiones del parlamento europeo , interrumpido el viernes 17 de diciembre pasado , y reitero a sus señorías mi deseo de que hayan tenido unas buenas vacaciones ." + "\n" +
"como todos han podido comprobar , el gran `` efecto del año 2000 '' no se ha producido . en cambio , los ciudadanos de varios de nuestros países han sido víctimas de catástrofes naturales verdaderamente terribles ." + "\n" +
"sus señorías han solicitado un debate sobre el tema para los próximos días , en el curso de este período de sesiones ." + "\n";
String sourceFileName;
{
File sourceFile = File.createTempFile("source", new Date().toString());
PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8");
sourcePrintStream.println(sourceCorpusString);
sourcePrintStream.close();
sourceFileName = sourceFile.getAbsolutePath();
}
sourceCorpusArray =
SuffixArrayFactory.createCorpusArray(sourceFileName);
}
@Test(dependsOnMethods={"setup"})
public void arity() {
SymbolTable vocab = sourceCorpusArray.getVocabulary();
Pattern pattern = new Pattern(vocab, vocab.getIDs("de sesiones del parlamento europeo"));
int[] terminalSequenceStartIndices = {4};
int[] sentenceNumbers = {0};
HierarchicalPhrases phrases =
new HierarchicalPhrases(pattern, terminalSequenceStartIndices, sentenceNumbers);
int arity = phrases.arity();
int n = phrases.getNumberOfTerminalSequences();
Assert.assertEquals(arity, 0);
Assert.assertEquals(n, 1);
}
// @SuppressWarnings("deprecation")
@Test(dependsOnMethods={"setup"})
public void testHasAlignedTerminal() {
{
SymbolTable vocab = sourceCorpusArray.getVocabulary();
{
// Pattern pattern = new Pattern(vocab, vocab.getIDs("de sesiones del parlamento europeo"));
// int[] terminalSequenceStartIndices = {4};
// int[] terminalSequenceEndIndices = {9};
// int length = 5;
Span span = new Span(4,9);
int[] words = vocab.getIDs("de sesiones del parlamento europeo");
List<LabeledSpan> labeledSpans = Collections.<LabeledSpan>emptyList();
HierarchicalPhrase phrase =
new HierarchicalPhrase(
words,
span,
labeledSpans,
sourceCorpusArray);
// HierarchicalPhrase phrase =
// new HierarchicalPhrase(
// vocab.getIDs("de sesiones del parlamento europeo"),
// terminalSequenceStartIndices,
// terminalSequenceEndIndices,
// sourceCorpusArray,
// length);
Assert.assertFalse(phrase.containsTerminalAt(0));
Assert.assertFalse(phrase.containsTerminalAt(1));
Assert.assertFalse(phrase.containsTerminalAt(2));
Assert.assertFalse(phrase.containsTerminalAt(3));
Assert.assertTrue(phrase.containsTerminalAt(4));
Assert.assertTrue(phrase.containsTerminalAt(5));
Assert.assertTrue(phrase.containsTerminalAt(6));
Assert.assertTrue(phrase.containsTerminalAt(7));
Assert.assertTrue(phrase.containsTerminalAt(8));
Assert.assertFalse(phrase.containsTerminalAt(9));
Assert.assertFalse(phrase.containsTerminalAt(10));
Assert.assertFalse(phrase.containsTerminalAt(11));
Assert.assertFalse(phrase.containsTerminalAt(Integer.MAX_VALUE));
Assert.assertFalse(phrase.containsTerminalAt(-1));
}
{
// Pattern pattern = new Pattern(vocab, vocab.getIDs(","));
// int[] terminalSequenceStartIndices = {9};
// int[] terminalSequenceEndIndices = {10};
// int length = 1;
HierarchicalPhrase phrase =
new HierarchicalPhrase(
vocab.getIDs(","),
new Span(9,10),
Collections.<LabeledSpan>emptyList(),
sourceCorpusArray);
// HierarchicalPhrase phrase =
// new HierarchicalPhrase(
// vocab.getIDs(","),
// terminalSequenceStartIndices,
// terminalSequenceEndIndices,
// sourceCorpusArray,
// length);
Assert.assertFalse(phrase.containsTerminalAt(0));
Assert.assertFalse(phrase.containsTerminalAt(1));
Assert.assertFalse(phrase.containsTerminalAt(2));
Assert.assertFalse(phrase.containsTerminalAt(3));
Assert.assertFalse(phrase.containsTerminalAt(4));
Assert.assertFalse(phrase.containsTerminalAt(5));
Assert.assertFalse(phrase.containsTerminalAt(6));
Assert.assertFalse(phrase.containsTerminalAt(7));
Assert.assertFalse(phrase.containsTerminalAt(8));
Assert.assertTrue(phrase.containsTerminalAt(9));
Assert.assertFalse(phrase.containsTerminalAt(10));
Assert.assertFalse(phrase.containsTerminalAt(11));
Assert.assertFalse(phrase.containsTerminalAt(Integer.MAX_VALUE));
Assert.assertFalse(phrase.containsTerminalAt(-1));
}
}
}
}