/** * * APDPlat - Application Product Development Platform Copyright (c) 2013, 杨尚川, * yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.superword.rule; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import org.apdplat.superword.model.Word; import org.apdplat.superword.rule.WordVector.Score; import org.apdplat.superword.tools.WordSources; /** * 如何判断两个英文单词是否相似? * 1、含义 * 2、读音 * 3、拼写 * 利用词向量技术,可以从拼写方面找出相似的词 * 我们一般都很难分辨双胞胎,因为他们长得太像了 * 不过呢,双胞胎的父母却能一眼识别,为什么? * 因为有长期的对比识别啊 * 记忆英语单词也一样,把相似的词找出来对比记忆 * 往往事半功倍 * @author 杨尚川 */ public class SimilarityRule { public void similarity(Set<Word> words, String target) { WordVector targetWordVecotr = WordVector.of(target); List<Score> scores = words.parallelStream() .map(word -> targetWordVecotr.score(WordVector.of(word.getWord()), true)) .filter(item -> item.getScore() > 5) .sorted() .collect(Collectors.toList()); Collections.reverse(scores); System.out.println("word "+target+" similarity rank: "); AtomicInteger i = new AtomicInteger(); scores.forEach(score -> System.out.println("\t"+i.incrementAndGet() + "、" + score.getWord() + " " + score.getScore() + " " + score.getExplain())); } public static void main(String[] args) throws Exception { Set<Word> words = WordSources.getAll(); SimilarityRule similarityRule = new SimilarityRule(); similarityRule.similarity(words, "book"); } }