package jhazm.test.tokenizer;
import jhazm.tokenizer.WordTokenizer;
import org.junit.Test;
import java.io.IOException;
import java.util.List;
import static org.junit.Assert.assertEquals;
/**
*
* @author Mojtaba Khallash
*/
public class WordTokenizerTest {
@Test
public void tokenizeTest() throws IOException {
WordTokenizer wordtokenizer = new WordTokenizer(false);
String input;
String[] expected;
List<String> actual;
input = "این جمله (خیلی) پیچیده نیست!!!";
expected = new String[] { "این", "جمله", "(", "خیلی", ")", "پیچیده", "نیست", "!!!"};
actual = wordtokenizer.tokenize(input);
check(input, expected, actual);
}
@Test
public void joinVerbPartsTest() throws IOException {
WordTokenizer wordtokenizer = new WordTokenizer(true);
String input;
String[] expected;
List<String> actual;
input = "خواهد رفت";
expected = new String[] { "خواهد رفت" };
actual = wordtokenizer.tokenize(input);
check(input, expected, actual);
input = "رفته است";
expected = new String[] { "رفته است" };
actual = wordtokenizer.tokenize(input);
check(input, expected, actual);
input = "گفته شده است";
expected = new String[] { "گفته شده است" };
actual = wordtokenizer.tokenize(input);
check(input, expected, actual);
input = "گفته خواهد شد";
expected = new String[] { "گفته خواهد شد" };
actual = wordtokenizer.tokenize(input);
check(input, expected, actual);
input = "خسته شدید";
expected = new String[] { "خسته", "شدید" };
actual = wordtokenizer.tokenize(input);
check(input, expected, actual);
}
private void check(String input, String[] expected, List<String> actual) {
assertEquals("Failed to tokenize words of '" + input + "' sentence", expected.length, actual.size());
for (int i = 0; i < expected.length; i++) {
assertEquals("Failed to tokenize words of '" + input + "' sentence", expected[i], actual.get(i));
}
}
}