package models; import com.atilika.kuromoji.ipadic.Token; import com.atilika.kuromoji.ipadic.Tokenizer; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import models.word.SymbolConst; import org.apache.commons.lang3.tuple.Pair; import java.util.List; /** * kuromojiを使って解析したり * 文言チェックしたり */ public class WordAnalyzer { private static Tokenizer token = null; private static synchronized Tokenizer getTokenizer() { if (token == null) { final Tokenizer.Builder builder = new Tokenizer.Builder(); builder.mode(Tokenizer.Mode.SEARCH); token = builder.build(); } return token; } // 入力した文字列を使って解析し、文言ごとにスペースで区切る public static List<String> analyze(final String sentence) { Tokenizer search = getTokenizer(); final List<Token> tokens = search.tokenize(sentence); final List<String> analiezed = Lists.newArrayList(); for (final Token token : tokens) { final String word = token.getSurface(); analiezed.add(word); } return analiezed; } public static boolean isAllHalfNumericAndSymbols(final String word) { return word != null && !word.isEmpty() && word.length() == word.getBytes().length; } public static boolean isContainsPeriodWord(final String word) { for (final String period : SymbolConst.PERIOD) { if (word.contains(period)) return true; } return false; } public static String[] splitBySpecialSymbol(String sentence) { final String separator = System.getProperty("line.separator"); // "「愛」と「勇気」だけが友達さ"が、"とだけが友達さ"と変な内容になってしまうので // かっこ削除は一旦コメントアウトする //sentence = lineBreakByParenthesis(sentence); sentence = lineBreakByPeriod(sentence, separator); sentence = convertFullWidthToHalfWidth(sentence); return sentence.split(separator); } @VisibleForTesting public static String convertFullWidthToHalfWidth(String sentence) { return sentence.replace(" ", " "); } @VisibleForTesting public static String lineBreakByParenthesis(String sentence) { for (final Pair<String, String> parenthesis: SymbolConst.PARENTHESIS) { final String regex = parenthesis.getLeft() + ".*" + parenthesis.getRight(); sentence = sentence.replaceAll(regex, ""); } return sentence; } @VisibleForTesting public static String lineBreakByPeriod(String sentence, String separator) { for (final String period : SymbolConst.PERIOD) { sentence = sentence.replace(period, period + separator); } return sentence; } }