/* LanguageTool, a natural language style checker
* Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.ResourceBundle;
import java.util.Set;
import junit.framework.Assert;
import morfologik.stemming.Dictionary;
import morfologik.stemming.DictionaryLookup;
import morfologik.stemming.WordData;
import org.languagetool.tagging.BaseTagger;
import org.languagetool.tagging.Tagger;
import org.languagetool.tagging.disambiguation.Disambiguator;
import org.languagetool.tokenizers.SentenceTokenizer;
import org.languagetool.tokenizers.Tokenizer;
import org.languagetool.tools.StringTools;
/**
* @author Daniel Naber
*/
public final class TestTools {
private TestTools() {
}
public static ResourceBundle getEnglishMessages() {
return getMessages("en");
}
public static Set<Language> getLanguagesExcept(String[] langCodes) {
final Set<Language> languages = new HashSet<Language>();
languages.addAll(Arrays.asList(Language.LANGUAGES));
if (langCodes != null) {
for (String langCode : langCodes) {
final Language lang = Language.getLanguageForShortName(langCode);
languages.remove(lang);
}
}
return languages;
}
/**
* Gets the resource bundle for the specified language.
* @param language lowercase two-letter ISO-639 code.
* @return the resource bundle for the specified language.
*/
public static ResourceBundle getMessages(String language) {
final ResourceBundle messages = ResourceBundle.getBundle(
JLanguageTool.MESSAGE_BUNDLE, new Locale(language));
return messages;
}
public static void testSplit(final String[] sentences,
final SentenceTokenizer sTokenizer) {
final StringBuilder inputString = new StringBuilder();
final List<String> input = new ArrayList<String>();
Collections.addAll(input, sentences);
for (final String string : input) {
inputString.append(string);
}
Assert.assertEquals(input, sTokenizer.tokenize(inputString.toString()));
}
public static void myAssert(final String input, final String expected,
final Tokenizer tokenizer, final Tagger tagger) throws IOException {
final List<String> tokens = tokenizer.tokenize(input);
final List<String> noWhitespaceTokens = new ArrayList<String>();
// whitespace confuses tagger, so give it the tokens but no whitespace
// tokens:
for (final String token : tokens) {
if (isWord(token)) {
noWhitespaceTokens.add(token);
}
}
final List<AnalyzedTokenReadings> output = tagger.tag(noWhitespaceTokens);
final StringBuilder outputStr = new StringBuilder();
for (final Iterator<AnalyzedTokenReadings> iter = output.iterator(); iter
.hasNext();) {
final AnalyzedTokenReadings token = iter.next();
final int readingsNumber = token.getReadingsLength();
final List<String> readings = new ArrayList<String>();
for (int j = 0; j < readingsNumber; j++) {
final StringBuilder readingStr = new StringBuilder();
readingStr.append(token.getAnalyzedToken(j).getToken());
readingStr.append("/[");
readingStr.append(token.getAnalyzedToken(j).getLemma());
readingStr.append(']');
readingStr.append(token.getAnalyzedToken(j).getPOSTag());
readings.add(readingStr.toString());
}
// force some order on the result just for the test case - order may vary
// from one version of the lexicon to the next:
Collections.sort(readings);
outputStr.append(StringTools.listToString(readings, "|"));
if (iter.hasNext()) {
outputStr.append(" -- ");
}
}
Assert.assertEquals(expected, outputStr.toString());
}
public static void myAssert(final String input, final String expected,
final Tokenizer tokenizer, final SentenceTokenizer sentenceTokenizer,
final Tagger tagger, final Disambiguator disambiguator)
throws IOException {
final StringBuilder outputStr = new StringBuilder();
final List<String> sentences = sentenceTokenizer.tokenize(input);
for (final String sentence : sentences) {
final List<String> tokens = tokenizer.tokenize(sentence);
final List<String> noWhitespaceTokens = new ArrayList<String>();
// whitespace confuses tagger, so give it the tokens but no whitespace
// tokens:
for (final String token : tokens) {
if (isWord(token)) {
noWhitespaceTokens.add(token);
}
}
final List<AnalyzedTokenReadings> aTokens = tagger
.tag(noWhitespaceTokens);
final AnalyzedTokenReadings[] tokenArray = new AnalyzedTokenReadings[tokens
.size() + 1];
final AnalyzedToken[] startTokenArray = new AnalyzedToken[1];
int toArrayCount = 0;
final AnalyzedToken sentenceStartToken = new AnalyzedToken("",
"SENT_START", null);
startTokenArray[0] = sentenceStartToken;
tokenArray[toArrayCount++] = new AnalyzedTokenReadings(startTokenArray, 0);
int startPos = 0;
int noWhitespaceCount = 0;
for (final String tokenStr : tokens) {
AnalyzedTokenReadings posTag = null;
if (isWord(tokenStr)) {
posTag = aTokens.get(noWhitespaceCount);
posTag.setStartPos(startPos);
noWhitespaceCount++;
} else {
posTag = tagger.createNullToken(tokenStr, startPos);
}
tokenArray[toArrayCount++] = posTag;
startPos += tokenStr.length();
}
AnalyzedSentence finalSentence = new AnalyzedSentence(tokenArray);
// disambiguate assigned tags
finalSentence = disambiguator.disambiguate(finalSentence);
final AnalyzedTokenReadings[] output = finalSentence.getTokens();
for (int i = 0; i < output.length; i++) {
final AnalyzedTokenReadings token = output[i];
final int readingsNumber = token.getReadingsLength();
final List<String> readings = new ArrayList<String>();
for (int j = 0; j < readingsNumber; j++) {
final StringBuilder readingStr = new StringBuilder();
readingStr.append(token.getAnalyzedToken(j).getToken());
readingStr.append("/[");
readingStr.append(token.getAnalyzedToken(j).getLemma());
readingStr.append(']');
readingStr.append(token.getAnalyzedToken(j).getPOSTag());
readings.add(readingStr.toString());
}
// force some order on the result just for the test case - order may vary
// from one version of the lexicon to the next:
Collections.sort(readings);
outputStr.append(StringTools.listToString(readings, "|"));
if (i < output.length - 1) {
outputStr.append(' ');
}
}
}
Assert.assertEquals(expected, outputStr.toString());
}
public static boolean isWord(final String token) {
for (int i = 0; i < token.length(); i++) {
final char c = token.charAt(i);
if (Character.isLetter(c) || Character.isDigit(c)) {
return true;
}
}
return false;
}
public static String callStringStaticMethod(final Class targetClass,
final String methodName, final Class[] argClasses,
final Object[] argObjects) throws InvocationTargetException,
IllegalArgumentException, IllegalAccessException, SecurityException,
NoSuchMethodException {
final Method method = targetClass.getDeclaredMethod(methodName, argClasses);
method.setAccessible(true);
return (String) method.invoke(null, argObjects);
}
public static void testDictionary(BaseTagger tagger, Language language) throws IOException {
final Dictionary dictionary = Dictionary.read(JLanguageTool.getDataBroker().getFromResourceDirAsUrl(tagger.getFileName()));
final DictionaryLookup lookup = new DictionaryLookup(dictionary);
for (WordData wordData : lookup) {
if (wordData.getTag() == null || wordData.getTag().length() == 0) {
System.err.println("**** Warning: " + language + ": the word " + wordData.getWord() + "/" + wordData.getStem() + " lacks a POS tag in the dictionary.");
}
}
}
}