/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package com.cognitionis.nlp_segmentation; import java.io.File; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; //import org.apache.commons.io.FileUtils; /** * * @author hector */ public class TokenizerTest { public TokenizerTest() { } @BeforeClass public static void setUpClass() { } @AfterClass public static void tearDownClass() { } @Before public void setUp() { } @After public void tearDown() { } /** * Test of tokenize method, of class Tokenizer_PTB_Rulebased. */ @Test public void testTokenize_File() throws Exception { System.out.println("tokenize"); //java.net.URL url = this.class.getResource("test/resources/tokenizer/test-input.txt"); //File f=FileUtils.toFile(this.getClass().getResource("/tokenizer/test-input.txt.tokenized")); File f_in = new File(this.getClass().getResource("/tokenizer/test-input.txt").toURI()); File f_out = new File(this.getClass().getResource("/tokenizer/test-input.txt.tokenized").toURI()); String expResultString = new String(java.nio.file.Files.readAllBytes(f_out.toPath()),"UTF-8"); String inputString = new String(java.nio.file.Files.readAllBytes(f_in.toPath()),"UTF-8"); Tokenizer_PTB_Rulebased instance = new Tokenizer_PTB_Rulebased(false); // tokenize without sentence splitting String result = instance.tokenize(inputString); assertEquals(expResultString, result); //assertArrayEquals } }