/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.chunker; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import org.junit.Assert; import org.junit.Test; import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.model.ModelType; /** * Tests for the {@link ChunkerFactory} class. */ public class ChunkerFactoryTest { private static ObjectStream<ChunkSample> createSampleStream() throws IOException { ResourceAsStreamFactory in = new ResourceAsStreamFactory( ChunkerFactoryTest.class, "/opennlp/tools/chunker/test.txt"); return new ChunkSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)); } private static ChunkerModel trainModel(ModelType type, ChunkerFactory factory) throws IOException { return ChunkerME.train("en", createSampleStream(), TrainingParameters.defaultParams(), factory); } @Test public void testDefaultFactory() throws IOException { ChunkerModel model = trainModel(ModelType.MAXENT, new ChunkerFactory()); ChunkerFactory factory = model.getFactory(); Assert.assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator); Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator); ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); ChunkerModel fromSerialized = new ChunkerModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator); Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator); } @Test public void testDummyFactory() throws IOException { ChunkerModel model = trainModel(ModelType.MAXENT, new DummyChunkerFactory()); DummyChunkerFactory factory = (DummyChunkerFactory) model.getFactory(); Assert.assertTrue(factory.getContextGenerator() instanceof DummyChunkerFactory.DummyContextGenerator); Assert.assertTrue(factory.getSequenceValidator() instanceof DummyChunkerFactory.DummySequenceValidator); ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); ChunkerModel fromSerialized = new ChunkerModel(in); factory = (DummyChunkerFactory) fromSerialized.getFactory(); Assert.assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator); Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator); ChunkerME chunker = new ChunkerME(model); String[] toks1 = {"Rockwell", "said", "the", "agreement", "calls", "for", "it", "to", "supply", "200", "additional", "so-called", "shipsets", "for", "the", "planes", "."}; String[] tags1 = {"NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB", "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "."}; chunker.chunk(toks1, tags1); } }