/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.ml.model;
import java.io.IOException;
import java.util.Collections;
import org.junit.Assert;
import org.junit.Test;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
public class TwoPassDataIndexerTest {
@Test
public void testIndex() throws IOException {
// He belongs to <START:org> Apache Software Foundation <END> .
ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
.add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
.add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
.add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
" powf=other,lc ppo=other")
.add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
" powf=other,ic ppo=other")
.add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
" pow=org-start,Software powf=org-start,ic ppo=other")
.add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
" powf=org-cont,ic ppo=org-start")
.add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
" ppo=org-cont")
.build();
DataIndexer indexer = new TwoPassDataIndexer();
indexer.init(new TrainingParameters(Collections.emptyMap()), null);
indexer.index(eventStream);
Assert.assertEquals(3, indexer.getContexts().length);
Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
Assert.assertNull(indexer.getValues());
Assert.assertEquals(5, indexer.getNumEvents());
Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
}
}