/******************************************************************************* * Copyright (c) 2012 György Orosz, Attila Novák. * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at * http://www.gnu.org/licenses/ * * This file is part of PurePos. * * PurePos is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PurePos is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * Contributors: * György Orosz - initial API and implementation ******************************************************************************/ package hu.ppke.itk.nlpg.purepos.common.serializer; import hu.ppke.itk.nlpg.corpusreader.CorpusReader; import hu.ppke.itk.nlpg.docmodel.IDocument; import hu.ppke.itk.nlpg.purepos.model.internal.RawModel; import java.io.File; import junit.framework.Assert; import org.junit.Test; @SuppressWarnings("static-access") public class SerializerTest { @Test public void readWriteTest() throws Exception { CorpusReader r = new CorpusReader(); IDocument d = r .read("Michael#Michael#[FN][NOM] Karaman#??Karaman#[FN][NOM]" + " ,#,#[PUNCT] az#az#[DET] Ann#Ann#[FN][NOM] 1#1#[SZN][NOM]"); RawModel model = new RawModel(3, 3, 10, 10); model.train(d); String pathname = "./_test.model"; File f = new File(pathname); SSerializer.writeModel(model, f); RawModel readModel = SSerializer.readModel(f); // TODO: write equality test case, now it is enough that it doesn't fail String modelTagVocab = model.getTagVocabulary().toString(); String readTagVocab = readModel.getTagVocabulary().toString(); // System.out.println(modelTagVocab); // System.out.println(readTagVocab); Assert.assertEquals(modelTagVocab.length(), readTagVocab.length()); Assert.assertEquals(model.getEmissionOrder(), readModel.getEmissionOrder()); Assert.assertEquals(model.getRareFreqency(), readModel.getRareFreqency()); Assert.assertEquals(model.getSuffixLength(), readModel.getSuffixLength()); Assert.assertEquals(model.getTaggingOrder(), readModel.getTaggingOrder()); Assert.assertEquals(model.getBOSIndex(), readModel.getBOSIndex()); Assert.assertEquals(model.getEOSIndex(), readModel.getEOSIndex()); Assert.assertEquals(model.getEOSTag(), readModel.getEOSTag()); Assert.assertEquals(model.getEOSToken(), readModel.getEOSToken()); Assert.assertEquals(model.getBOSTag(), readModel.getBOSTag()); Assert.assertEquals(model.getBOSToken(), readModel.getBOSToken()); Assert.assertEquals(model.getLastStat(), readModel.getLastStat()); Assert.assertEquals(model.getSpecTokensLexicon().size(), readModel .getSpecTokensLexicon().size()); Assert.assertEquals(model.getStandardTokensLexicon().size(), readModel .getStandardTokensLexicon().size()); SSerializer.deleteModel(f); } @Test public void incrementalTest() throws Exception { CorpusReader r = new CorpusReader(); IDocument d1 = r .read("Michael#Michael#[FN][NOM] Karaman#??Karaman#[FN][NOM]" + " ,#,#[PUNCT] az#az#[DET] Ann#Ann#[FN][NOM] 1#1#[SZN][NOM]"); IDocument d2 = r .read("Ez#ez#[FN|NM][NOM] volt#van#[IGE][Me3] a#a#[DET]" + " legszebb#szép#[FF][MN][_FOK][NOM]" + " estém#este#[FN][PSe1][NOM] .#.#[PUNCT] "); RawModel model = new RawModel(3, 3, 10, 10); model.train(d1); Assert.assertEquals(6, model.getStandardTokensLexicon().size()); String pathname = "./_test.model"; File f = new File(pathname); SSerializer.writeModel(model, f); RawModel readModel = SSerializer.readModel(f); readModel.train(d2); Assert.assertEquals(12, readModel.getStandardTokensLexicon().size()); SSerializer.deleteModel(f); } }