package hextostring.evaluate.string;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import hextostring.evaluate.EvaluationResult;
/**
* Standard evaluator for Japanese strings.
*
* @author Maxime PIA
*/
public class JapaneseStringEvaluator extends StringEvaluator {
// valid Japanese characters
public static final String JAPANESE_CHARS_REGEX =
"([a-zA-Z0-9#&]|[a-zA-Z0-9]|[\u3000-\u303F]|\u2048|\u2049|" +
"[\u3040-\u309F]|[\u30A0-\u30FF]|[\uFF00-\uFFEF]|“|”|…|—|" +
"[\u4E00-\u9FAF]|[\u2605-\u2606]|[\u2190-\u2195]|\u203B)+";
// kana
public static final String KANA_REGEX = "[\u3040-\u309F]|[\u30A0-\u30FF]";
// valid Japanese punctuation
public static final String[] JAPANESE_PUNCTUATION =
{"”", "—", "。", "!", "?", "…", "、", ")", "】", "」", "』", "〜",
"\u2048", // one-character ?!
"\u2049" // one-character !?
};
public static final int FINAL_PUNCTUATION_BONUS = 24;
public static final int PUNCTUATION_BONUS = 8;
public static final int NO_KANA_MALUS = 24;
public static final int KANA_BONUS = 4;
private int getInvalidCharactersCount(String jp) {
String jpNoLineBreaks = jp.replace("\n", "");
Matcher m =
Pattern.compile(JAPANESE_CHARS_REGEX).matcher(jpNoLineBreaks);
String match = "";
while (m.find()) {
match += m.group();
}
return jpNoLineBreaks.length() - match.length();
}
private int getNbKana(String jp) {
String jpNoLineBreaks = jp.replace("\n", "");
Matcher m = Pattern.compile(KANA_REGEX).matcher(jpNoLineBreaks);
String match = "";
while (m.find()) {
match += m.group();
}
return match.length();
}
private boolean hasFinalPunctuation(String s) {
if (s.length() == 0) return false;
String lastChar = s.substring(s.length() - 1);
for (String punctuation : JAPANESE_PUNCTUATION) {
if (lastChar.matches(punctuation)) {
return true;
}
}
return false;
}
private int getNbPunctuation(String s) {
int total = 0;
for (int i = 0; i < s.length(); ++i) {
for (String punctuation : JAPANESE_PUNCTUATION) {
if ((s.charAt(i) + "").matches(punctuation)) {
++total;
break;
}
}
}
return total;
}
@Override
public EvaluationResult evaluate(String s) {
StringBuilder details = new StringBuilder();
List<Integer> points = new LinkedList<>();
int nbInvalidCharacters = getInvalidCharactersCount(s);
int nbPunctuation = getNbPunctuation(s);
boolean finalPunctuation = hasFinalPunctuation(s);
int nbKana = getNbKana(s);
details.append(nbInvalidCharacters);
details.append(" invalid characters; applying malus of ");
details.append(INVALID_CHARS_MALUS);
details.append(" for every invalid character: ");
points.add(-nbInvalidCharacters * INVALID_CHARS_MALUS);
details.append(points.get(points.size() - 1));
details.append("\n");
details.append(nbPunctuation);
details.append(" punctuation symbols; applying bonus of ");
details.append(PUNCTUATION_BONUS);
details.append(" for every punctuation symbol: +");
points.add(nbPunctuation * PUNCTUATION_BONUS);
details.append(points.get(points.size() - 1));
details.append("\n");
if (!finalPunctuation) {
details.append("No final punctuation; no bonus applied");
} else {
details.append("Final punctuation detected");
details.append(" - applying bonus of ");
details.append(FINAL_PUNCTUATION_BONUS);
details.append(" once: +");
points.add(FINAL_PUNCTUATION_BONUS);
details.append(points.get(points.size() - 1));
}
details.append("\n");
details.append(nbKana);
details.append(" kana(s); applying ");
if (nbKana == 0) {
details.append("malus of ");
details.append(NO_KANA_MALUS);
details.append(" once: ");
points.add(-NO_KANA_MALUS);
} else {
details.append("bonus of ");
details.append(KANA_BONUS);
details.append(" for every kana: +");
points.add(nbKana * KANA_BONUS);
}
details.append(points.get(points.size() - 1));
int mark = 0;
details.append("\nTotal: ");
for (Integer point : points) {
if(point >= 0) {
details.append("+");
}
details.append(point);
mark += point;
}
details.append("=");
details.append(mark);
return new EvaluationResult(mark, details.toString());
}
}