/** * * APDPlat - Application Product Development Platform Copyright (c) 2013, 杨尚川, * yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.superword.rule; import org.apache.commons.lang.StringUtils; import org.apdplat.superword.model.ComplexPrefix; import org.apdplat.superword.model.Prefix; import org.apdplat.superword.model.Word; import org.apdplat.superword.tools.HtmlFormatter; import org.apdplat.superword.tools.WordSources; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.InputStreamReader; import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; /** * 从指定的英文单词的集合中找出符合前缀规则的单词 * @author 杨尚川 */ public class PrefixRule { private PrefixRule(){} private static final Logger LOGGER = LoggerFactory.getLogger(PrefixRule.class); public static List<Prefix> getAllPrefixes(){ List<Prefix> prefixes = new ArrayList<>(); // 流式解析, 自动关闭资源 try(BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(PrefixRule.class.getResourceAsStream("/root_affix.txt")))){ String line = null; while((line = bufferedReader.readLine()) != null){ if(StringUtils.isNotBlank(line) && !line.startsWith("#") && line.startsWith("前缀:")){ String[] attr = line.substring(3).split("杨尚川"); if(attr != null && attr.length == 2){ String prefix = attr[0]; String meaning = attr[1]; if(prefix.contains(",")){ prefixes.addAll(new ComplexPrefix(prefix, meaning).simplify()); LOGGER.debug("复杂前缀:"+prefix+meaning); }else{ prefixes.add(new Prefix(prefix, meaning)); LOGGER.debug("前缀:"+prefix+meaning); } }else{ LOGGER.error("解析前缀出错:"+line); } } } }catch (Exception e){ LOGGER.error(e.getMessage(), e); } return prefixes; } public static TreeMap<Prefix, List<Word>> findByPrefix(Collection<Word> words, Collection<Prefix> prefixes, boolean strict) { TreeMap<Prefix, List<Word>> map = new TreeMap<>(); for(Prefix prefix : prefixes){ map.put(prefix, findByPrefix(words, prefix, strict)); } return map; } public static List<Word> findByPrefix(Collection<Word> words, Prefix prefix, boolean strict) { return words .parallelStream() .filter(word -> { String w = word.getWord(); if(Character.isUpperCase(w.charAt(0))){ return false; } String p = prefix.getPrefix().replace("-", "").toLowerCase(); if(strict){ if(w.startsWith(p) && w.length()-p.length()>2 && words.contains(new Word(w.substring(p.length()), ""))){ return true; } } else if (w.startsWith(p)) { return true; } return false; }) .sorted() .collect(Collectors.toList()); } public static Map<Word, List<Word>> convert(Map<Prefix, List<Word>> data){ Map<Word, List<Word>> r = new HashMap<>(); data.keySet().forEach(k -> r.put(new Word(k.getPrefix(), k.getDes()), data.get(k))); return r; } public static void main(String[] args) throws Exception { Set<Word> words = WordSources.getSyllabusVocabulary(); //List<Prefix> prefixes = PrefixExtractor.extract(); //List<Prefix> prefixes = Arrays.asList(new Prefix("mono,mon", "单个,一个")); //List<Prefix> prefixes = new ComplexPrefix("dis-,in-,im-,il-,ir-,un-,mis-,non-,dis-,de-,anti-,counter-", "否定前缀").simplify(); List<Prefix> prefixes = PrefixRule.getAllPrefixes(); TreeMap<Prefix, List<Word>> prefixToWords = PrefixRule.findByPrefix(words, prefixes, false); String htmlFragment = HtmlFormatter.toHtmlTableFragmentForRootAffix(convert(prefixToWords), 6); Files.write(Paths.get("target/prefix_rule.txt"), htmlFragment.getBytes("utf-8")); } }