/** * * APDPlat - Application Product Development Platform Copyright (c) 2013, 杨尚川, * yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.superword.rule; import org.apdplat.superword.model.Prefix; import org.apdplat.superword.model.Word; import org.apdplat.superword.tools.WordLinker; import org.apdplat.superword.tools.WordLinker.Dictionary; import org.apdplat.superword.tools.WordSources; import java.nio.file.Files; import java.nio.file.Paths; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; /** * 动态前缀规则,比如规则为:m-imm,表示单词集合中 * 有两个词分别以m和imm开始 * 且除了前缀外,其他部分都相同 * @author 杨尚川 */ public class DynamicPrefixRule { private static final Set<Word> WORDS = WordSources.getAll(); private DynamicPrefixRule(){} public static List<Word> findByPrefix(Collection<Word> words, List<Prefix> prefixes) { if(prefixes == null || prefixes.size() < 2){ return Arrays.asList(); } return words .parallelStream() .filter(word -> { String w = word.getWord(); String p = prefixes.get(0).getPrefix().toLowerCase(); p = p.replaceAll("-", "").replaceAll("\\s+", ""); if (!w.toLowerCase().startsWith(p)) { return false; } String common = w.substring(p.length()); //这里要用for,忽略第一个元素 for(int i=1; i<prefixes.size(); i++){ String s = prefixes.get(i).getPrefix().toLowerCase(); s = s.replaceAll("-", "").replaceAll("\\s+", ""); if(!words.contains(new Word(s+common, ""))){ return false; } } return true; }) .sorted() .collect(Collectors.toList()); } public static String toHtmlFragment(List<Word> words, List<Prefix> prefixes) { return toHtmlFragment(words, prefixes, Dictionary.ICIBA); } public static String toHtmlFragment(List<Word> words, List<Prefix> prefixes, Dictionary dictionary) { StringBuilder html = new StringBuilder(); html.append("<h4>common suffix different prefix: "); prefixes.forEach(prefix -> html.append(prefix.getPrefix()).append("\t")); html.append(" (hit ") .append(words.size()) .append(")</h4>\n") .append("<table>\n"); AtomicInteger wordCounter = new AtomicInteger(); words.forEach(word -> { String w = word.getWord(); String common = null; //这里用for比较适合,因为要break for(Prefix prefix : prefixes) { String s = prefix.getPrefix().toLowerCase(); s = s.replaceAll("-", "").replaceAll("\\s+", ""); if(w.startsWith(s)){ common = w.substring(s.length()); break; } } if(common != null){ html.append("\t") .append("<tr><td>") .append(wordCounter.incrementAndGet()) .append("、</td>"); final String c = common; prefixes.forEach(prefix -> { String s = prefix.getPrefix().toLowerCase(); s = s.replaceAll("-", "").replaceAll("\\s+", ""); html.append("<td>") .append(WordLinker.toLink(s + c, s, dictionary)) .append("</td>"); }); if(WORDS.contains(new Word(c, ""))) { html.append("<td>") .append(WordLinker.toLink(c, c, dictionary)) .append("</td>"); } } html.append("</tr>\n"); }); html.append("</table>"); return html.toString(); } public static void main(String[] args) throws Exception { Set<Word> words = WordSources.getAll(); List<Prefix> prefixes = Arrays.asList(new Prefix("m", ""), new Prefix("imm", "")); List<Word> data = DynamicPrefixRule.findByPrefix(words, prefixes); String htmlFragment = DynamicPrefixRule.toHtmlFragment(data, prefixes, Dictionary.ICIBA); Files.write(Paths.get("target/dynamic_prefix_rule.txt"), htmlFragment.getBytes("utf-8")); System.out.println(htmlFragment); } }