/** * * APDPlat - Application Product Development Platform Copyright (c) 2013, 杨尚川, * yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.superword.rule; import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import org.apdplat.superword.model.CharMap; import org.apdplat.superword.model.Word; import org.apdplat.superword.tools.WordLinker; import org.apdplat.superword.tools.WordSources; /** * 字符转换规则: * * 单词的发展是一个历史的递进的过程,从无到有,从有到多 * 字母之间的转化是有一定规律的,如元音字母(a e i o u)之间相互转化 * 发音相近的辅音(如双唇音唇齿音的清辅音和浊辅音等等)之间的转化 * 发音相同的字母和字母组合之间的转化(如ph和f) * 长相相近的字母之间的转化(因为字母看上去长得像,古时候手写容易错, * 如V和U,M和N,等等,在长期的发展过程中,不小心写错的词 * 由于作者的影响力大或者其他因素也会演化出新的单词,并具有相关的含义)。 * @author 杨尚川 */ public class CharTransformRule { private CharTransformRule(){} private static final List<CharMap> CHAR_MAP_REGULAR = new ArrayList<>(); static { CHAR_MAP_REGULAR.add(new CharMap("b", "p")); CHAR_MAP_REGULAR.add(new CharMap("b", "m")); CHAR_MAP_REGULAR.add(new CharMap("b", "f")); CHAR_MAP_REGULAR.add(new CharMap("b", "v")); CHAR_MAP_REGULAR.add(new CharMap("p", "m")); CHAR_MAP_REGULAR.add(new CharMap("p", "f")); CHAR_MAP_REGULAR.add(new CharMap("p", "v")); CHAR_MAP_REGULAR.add(new CharMap("m", "f")); CHAR_MAP_REGULAR.add(new CharMap("m", "v")); CHAR_MAP_REGULAR.add(new CharMap("f", "v")); CHAR_MAP_REGULAR.add(new CharMap("d", "t")); CHAR_MAP_REGULAR.add(new CharMap("d", "s")); CHAR_MAP_REGULAR.add(new CharMap("d", "c")); CHAR_MAP_REGULAR.add(new CharMap("d", "z")); CHAR_MAP_REGULAR.add(new CharMap("d", "th")); CHAR_MAP_REGULAR.add(new CharMap("t", "s")); CHAR_MAP_REGULAR.add(new CharMap("t", "c")); CHAR_MAP_REGULAR.add(new CharMap("t", "z")); CHAR_MAP_REGULAR.add(new CharMap("t", "th")); CHAR_MAP_REGULAR.add(new CharMap("s", "c")); CHAR_MAP_REGULAR.add(new CharMap("s", "z")); CHAR_MAP_REGULAR.add(new CharMap("s", "th")); CHAR_MAP_REGULAR.add(new CharMap("c", "z")); CHAR_MAP_REGULAR.add(new CharMap("c", "th")); CHAR_MAP_REGULAR.add(new CharMap("ch", "k")); CHAR_MAP_REGULAR.add(new CharMap("z", "th")); CHAR_MAP_REGULAR.add(new CharMap("g", "k")); CHAR_MAP_REGULAR.add(new CharMap("g", "c")); CHAR_MAP_REGULAR.add(new CharMap("g", "h")); CHAR_MAP_REGULAR.add(new CharMap("k", "c")); CHAR_MAP_REGULAR.add(new CharMap("k", "h")); CHAR_MAP_REGULAR.add(new CharMap("c", "h")); CHAR_MAP_REGULAR.add(new CharMap("r", "l")); CHAR_MAP_REGULAR.add(new CharMap("r", "n")); CHAR_MAP_REGULAR.add(new CharMap("l", "n")); CHAR_MAP_REGULAR.add(new CharMap("m", "n")); CHAR_MAP_REGULAR.add(new CharMap("a", "e")); CHAR_MAP_REGULAR.add(new CharMap("a", "i")); CHAR_MAP_REGULAR.add(new CharMap("a", "o")); CHAR_MAP_REGULAR.add(new CharMap("a", "u")); CHAR_MAP_REGULAR.add(new CharMap("e", "i")); CHAR_MAP_REGULAR.add(new CharMap("e", "o")); CHAR_MAP_REGULAR.add(new CharMap("e", "u")); CHAR_MAP_REGULAR.add(new CharMap("i", "o")); CHAR_MAP_REGULAR.add(new CharMap("i", "u")); CHAR_MAP_REGULAR.add(new CharMap("o", "u")); //发音相同的字母和字母组合 CHAR_MAP_REGULAR.add(new CharMap("ph", "f")); //字母长得像,容易写错 CHAR_MAP_REGULAR.add(new CharMap("v", "u")); CHAR_MAP_REGULAR.add(new CharMap("v", "w")); CHAR_MAP_REGULAR.add(new CharMap("u", "w")); CHAR_MAP_REGULAR.add(new CharMap("i", "l")); CHAR_MAP_REGULAR.add(new CharMap("i", "j")); CHAR_MAP_REGULAR.add(new CharMap("f", "t")); CHAR_MAP_REGULAR.add(new CharMap("m", "w")); } public static String toHtmlFragmentForWord(Map<Word, Map<CharMap, List<Word>>> data){ StringBuilder result = new StringBuilder(); AtomicInteger i = new AtomicInteger(); data.keySet().forEach(target -> { if(data.size() > 1) { result.append(i.incrementAndGet()) .append(". ") .append(target.getWord()) .append("</br>\n"); } AtomicInteger j = new AtomicInteger(); data.get(target).keySet().forEach(charMap -> { result.append("\t") .append(j.incrementAndGet()) .append(". ") .append(charMap.getFrom()) .append(" - ") .append(charMap.getTo()) .append("\n"); String from = charMap.getFrom(); String to = charMap.getTo(); result.append("<ol>\n"); data.get(target).get(charMap).forEach(word -> { result.append("\t\t") .append("<li>") .append(WordLinker.toLink(word.getWord(), from)) .append(" -> ") .append(WordLinker.toLink(word.getWord().replaceAll(from, to), to)) .append("</li>\n"); }); result.append("</ol>\n"); }); }); return result.toString(); } public static Map<Word, Map<CharMap, List<Word>>> transforms(Set<Word> words, Word target){ return transforms(words, new HashSet<Word>(Arrays.asList(target))); } public static Map<Word, Map<CharMap, List<Word>>> transforms(Set<Word> words, Set<Word> targets){ Map<CharMap, List<Word>> data = transforms(words); Map<Word, Map<CharMap, List<Word>>> result = new ConcurrentHashMap<>(); targets.parallelStream().forEach(target -> { Map<CharMap, List<Word>> t = new HashMap<>(); data.entrySet().parallelStream().forEach(entry -> { List<Word> w = new ArrayList<>(); String from = entry.getKey().getFrom(); String to = entry.getKey().getTo(); entry.getValue().parallelStream().forEach(word -> { String old = word.getWord(); if (target.getWord().equals((old))) { w.add(word); } else if (target.getWord().equals(old.replaceAll(from, to))) { w.add(word); } }); if (!w.isEmpty()) { t.put(entry.getKey(), w); } }); if(!t.isEmpty()){ result.put(target, t); } }); return result; } public static Map<CharMap, List<Word>> transforms(Set<Word> words) { Map<CharMap, List<Word>> result = new ConcurrentHashMap<>(); CHAR_MAP_REGULAR.parallelStream().forEach(charMap -> result.putAll(transform(words, charMap))); return result; } /** * 将单词中的一部分字母转变为另一部分字母 * @param words 英文单词的集合 * @param charMap 转化的字母或字母组合规则描述 */ public static Map<CharMap, List<Word>> transform(Set<Word> words, CharMap charMap) { String from = charMap.getFrom(); String to = charMap.getTo(); List<Word> list = words.parallelStream() .filter(word -> word.getWord().contains(from) && words.contains( new Word( word.getWord().replaceAll(from, to), null))) .sorted() .collect(Collectors.toList()); Map<CharMap, List<Word>> result = new HashMap<>(); if(!list.isEmpty()) { result.put(new CharMap(from, to), list); } return result; } public static String toHtmlFragmentForRule(Map<CharMap, List<Word>> data){ StringBuilder html = new StringBuilder(); AtomicInteger i = new AtomicInteger(); List<CharMap> sortedList = new ArrayList<>(data.keySet()); Collections.sort(sortedList); sortedList.forEach(charMap -> { String from = charMap.getFrom(); String to = charMap.getTo(); List<Word> list = data.get(charMap); html.append("<h2>") .append(i.incrementAndGet()) .append("、") .append(from) .append(" - ") .append(to) .append(" rule total number: ") .append(list.size()) .append("</h2></br>\n"); AtomicInteger j = new AtomicInteger(); list.stream() .forEach(word -> html.append("\t") .append(j.incrementAndGet()) .append("、") .append(WordLinker.toLink(word.getWord())) .append(" -> ") .append(WordLinker.toLink(word.getWord().replaceAll(from, to))) .append("</br>\n")); }); return html.toString(); } public static void main(String[] args) throws Exception { WordLinker.serverRedirect = null; WordLinker.jsDefinition = false; Set<Word> words = WordSources.getSyllabusVocabulary(); //Map<CharMap, List<Word>> data = CharTransformRule.transforms(words); //String html = CharTransformRule.toHtmlFragmentForRule(data); Map<Word, Map<CharMap, List<Word>>> data2 = CharTransformRule.transforms(words, new Word("back", "")); String html2 = CharTransformRule.toHtmlFragmentForWord(data2); //System.out.println(html); System.out.println(html2); //Files.write(Paths.get("target/char_transform_rule.txt"), Arrays.asList(html, html2)); Files.write(Paths.get("target/char_transform_rule.txt"), Arrays.asList(html2)); } }