/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.ml.model; import java.io.IOException; import java.util.Collections; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.TrainingParameters; public class OnePassRealValueDataIndexerTest { DataIndexer indexer; @Before public void setUp() throws Exception { indexer = new OnePassRealValueDataIndexer(); indexer.init(new TrainingParameters(Collections.emptyMap()), null); } @Test public void testIndex() throws IOException { // He belongs to <START:org> Apache Software Foundation <END> . ObjectStream<Event> eventStream = new SimpleEventStreamBuilder() .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other") .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other") .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" + " powf=other,lc ppo=other") .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" + " powf=other,ic ppo=other") .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" + " pow=org-start,Software powf=org-start,ic ppo=other") .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" + " powf=org-cont,ic ppo=org-start") .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" + " ppo=org-cont") .build(); indexer.index(eventStream); Assert.assertEquals(3, indexer.getContexts().length); Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]); Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]); Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]); Assert.assertEquals(3, indexer.getValues().length); Assert.assertNull(indexer.getValues()[0]); Assert.assertNull(indexer.getValues()[1]); Assert.assertNull(indexer.getValues()[2]); Assert.assertEquals(5, indexer.getNumEvents()); Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList()); Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen()); Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels()); Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels()); Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts()); } @Test public void testIndexValues() throws IOException { // He belongs to <START:org> Apache Software Foundation <END> . ObjectStream<Event> eventStream = new SimpleEventStreamBuilder() .add("other/w=he;0.1 n1w=belongs;0.2 n2w=to;0.1 po=other;0.1" + " pow=other,He;0.1 powf=other,ic;0.1 ppo=other;0.1") .add("other/w=belongs;0.1 p1w=he;0.2 n1w=to;0.1 n2w=apache;0.1" + " po=other;0.1 pow=other,belongs;0.1 powf=other,lc;0.1 ppo=other;0.1") .add("other/w=to;0.1 p1w=belongs;0.2 p2w=he;0.1 n1w=apache;0.1" + " n2w=software;0.1 po=other;0.1 pow=other,to;0.1 powf=other,lc;0.1 ppo=other;0.1") .add("org-start/w=apache;0.1 p1w=to;0.2 p2w=belongs;0.1 n1w=software;0.1 n2w=foundation;0.1" + " po=other;0.1 pow=other,Apache;0.1 powf=other,ic;0.1 ppo=other;0.1") .add("org-cont/w=software;0.1 p1w=apache;0.2 p2w=to;0.1 n1w=foundation;0.1" + " n2w=.;0.1 po=org-start;0.1 pow=org-start,Software;0.1 powf=org-start,ic;0.1 ppo=other;0.1") .add("org-cont/w=foundation;0.1 p1w=software;0.2 p2w=apache;0.1 n1w=.;0.1 po=org-cont;0.1" + " pow=org-cont,Foundation;0.1 powf=org-cont,ic;0.1 ppo=org-start;0.1") .add("other/w=.;0.1 p1w=foundation;0.1 p2w=software;0.1 po=org-cont;0.1 pow=org-cont,.;0.1" + " powf=org-cont,other;0.1 ppo=org-cont;0.1") .build(); indexer.index(eventStream); System.out.println(indexer); Assert.assertEquals(3, indexer.getContexts().length); Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]); Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]); Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]); Assert.assertEquals(3, indexer.getValues().length); final float delta = 0.001F; Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F}, indexer.getValues()[0], delta); Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F}, indexer.getValues()[1], delta); Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F}, indexer.getValues()[2], delta); Assert.assertEquals(5, indexer.getNumEvents()); Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList()); Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen()); Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels()); Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels()); Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts()); } }