/**
*
* APDPlat - Application Product Development Platform Copyright (c) 2013, 杨尚川,
* yang-shangchuan@qq.com
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package org.apdplat.superword.tools;
import org.apache.commons.lang.StringUtils;
import org.apdplat.superword.model.SynonymAntonym;
import org.apdplat.superword.model.SynonymDiscrimination;
import org.apdplat.superword.model.Word;
import org.apdplat.superword.rule.PartOfSpeech;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apdplat.superword.tools.WordLinker.Dictionary;
/**
* HTML格式化工具,将生成的HTML片段发布到网络上的博客、日志中
* @author 杨尚川
*/
public class HtmlFormatter {
private HtmlFormatter(){}
private static final String RED_EM_PRE = "<span style=\"color:red\">";
private static final String RED_EM_SUF = "</span>";
private static final String BLUE_EM_PRE = "<span style=\"color:blue\">";
private static final String BLUE_EM_SUF = "</span>";
public static String toHtmlFragmentForText(Map<String, AtomicInteger> data, Set<String> fileNames) {
return toHtmlFragmentForText(data, fileNames, Dictionary.ICIBA);
}
public static String toHtmlFragmentForText(Map<String, AtomicInteger> data, Set<String> fileNames, Dictionary dictionary) {
StringBuilder html = new StringBuilder();
html.append("统计书籍:<br/>\n");
AtomicInteger i = new AtomicInteger();
fileNames.stream()
.sorted()
.forEach(fileName -> html.append(i.incrementAndGet())
.append("、")
.append(Paths.get(fileName).toFile().getName().replace(".txt", ""))
.append("<br/>\n"));
Map<Integer, TextAnalyzer.Stat> stat = TextAnalyzer.distribute(data);
html.append("共有")
.append(data.size())
.append("个单词,出现次数统计:<br/>\n")
.append("<table border=\"1\" bordercolor=\"#00CCCC\" width=\"850\">\n\t<tr><td>序号</td><td>出现次数</td><td>单词个数</td><td>单词</td></tr>\n");
AtomicInteger k = new AtomicInteger();
stat.keySet()
.stream()
.sorted((a, b) -> b - a)
.forEach(s -> {
html.append("\t<tr><td>")
.append(k.incrementAndGet())
.append("</td><td>")
.append(s)
.append("</td><td>")
.append(stat.get(s).count())
.append("</td><td>");
AtomicInteger z = new AtomicInteger();
List<String> list = stat.get(s).getWords();
list.stream()
.sorted()
.forEach(w -> {
if (list.size() > 1) {
html.append(z.incrementAndGet())
.append(".")
.append(WordLinker.toLink(w, dictionary))
.append(" ");
} else if (list.size() == 1) {
html.append(WordLinker.toLink(w, dictionary));
}
});
html.append("</td></tr>\n");
});
html.append("</table>")
.append("\n共有(")
.append(data.size())
.append(")个单词:<br/>\n")
.append("<table>\n\t<tr><td>序号</td><td>单词</td><td>词频</td></tr>\n");
AtomicInteger wordCounter = new AtomicInteger();
data.entrySet()
.stream()
.filter(entry -> entry.getKey().length() <= 14)
.sorted((a, b) -> b.getValue().get() - a.getValue().get())
.forEach(entry -> {
html.append("\t")
.append("<tr><td>")
.append(wordCounter.incrementAndGet())
.append("</td><td>")
.append(WordLinker.toLink(entry.getKey(), dictionary))
.append("</td><td>")
.append(entry.getValue().get())
.append("</td></tr>\n");
});
html.append("</table>\n")
.append("长度大于14的词:")
.append("\n<table>\n\t<tr><td>序号</td><td>单词</td><td>词频</td></tr>\n");
AtomicInteger j = new AtomicInteger();
data.entrySet()
.stream()
.filter(entry -> entry.getKey().length() > 14)
.sorted((a, b) ->
b.getValue().get() - a.getValue().get())
.forEach(entry ->
html.append("\t")
.append("<tr><td>")
.append(j.incrementAndGet())
.append("</td><td>")
.append(WordLinker.toLink(entry.getKey(), dictionary))
.append("</td><td>")
.append(entry.getValue().get())
.append("</td></tr>\n"));
html.append("</table>\n")
.append("长度为2的词:")
.append("\n<table>\n\t<tr><td>序号</td><td>单词</td><td>词频</td></tr>\n");
AtomicInteger z = new AtomicInteger();
data.entrySet()
.stream()
.filter(entry -> entry.getKey().length() == 2)
.sorted((a, b) ->
b.getValue().get() - a.getValue().get())
.forEach(entry ->
html.append("\t")
.append("<tr><td>")
.append(z.incrementAndGet())
.append("</td><td>")
.append(WordLinker.toLink(entry.getKey(), dictionary))
.append("</td><td>")
.append(entry.getValue().get())
.append("</td></tr>\n"));
html.append("</table>");
return html.toString();
}
public static String toHtmlForSentence(Map<String, String> data, Map<Word, AtomicInteger> wordFrequence){
return toHtmlForSentence(data, wordFrequence, Dictionary.ICIBA);
}
public static String toHtmlForSentence(Map<String, String> data, Map<Word, AtomicInteger> wordFrequence, Dictionary dictionary){
StringBuilder text = new StringBuilder();
text.append("共有 ")
.append(data.size())
.append(" 句子,")
.append(wordFrequence.size())
.append(" 个单词。<br/>\n")
.append("<h4>一、句子("+data.size()+"):</h4>\n");
AtomicInteger i = new AtomicInteger();
data
.keySet()
.stream()
.sorted((a, b) -> a.length() - b.length())
.forEach(s -> text
.append(i.incrementAndGet())
.append("、")
.append(processSentence(s, wordFrequence, dictionary))
.append(" ")
.append(data.get(s))
.append("<br/>\n"));
text
.append("<br/>\n<h4>二、单词("+wordFrequence.size()+"):</h4>\n")
.append(HtmlFormatter.toHtmlTableFragment(wordFrequence, 6, dictionary));
return text.toString();
}
private static String processSentence(String sentence, Map<Word, AtomicInteger> wordFrequence){
return processSentence(sentence, wordFrequence, Dictionary.ICIBA);
}
private static String processSentence(String sentence, Map<Word, AtomicInteger> wordFrequence, Dictionary dictionary){
sentence = sentence.replace(";", "; ")
.replace(",", ", ")
.replace(".", ". ")
.replace("?", "? ")
.replace("!", "! ");
StringBuilder s = new StringBuilder();
for(String w : sentence.split("\\s+")){
if(w.endsWith(";")
|| w.endsWith(",")
|| w.endsWith(".")
|| w.endsWith("?")
|| w.endsWith("!")){
Word word = new Word(w.substring(0, w.length()-1).toLowerCase(), "");
if(wordFrequence.containsKey(word)
&& wordFrequence.get(word).get()<10){
s.append(WordLinker.toLink(word.getWord(), dictionary)).append(w.substring(w.length()-1)).append(" ");
}else{
s.append(w).append(" ");
}
}else {
Word word = new Word(w.toLowerCase(), "");
if (wordFrequence.containsKey(word)
&& wordFrequence.get(word).get() < 10) {
s.append(WordLinker.toLink(w, dictionary)).append(" ");
} else {
s.append(w).append(" ");
}
}
}
return s.toString();
}
public static String toHtmlForCompoundWord(Map<Word, Map<Integer, List<Word>>> data, int rowLength){
return toHtmlForCompoundWord(data, rowLength, Dictionary.ICIBA);
}
public static String toHtmlForCompoundWord(Map<Word, Map<Integer, List<Word>>> data, int rowLength, Dictionary dictionary){
Set<Word> elements = new HashSet<>();
StringBuilder html = new StringBuilder();
html.append("<br/>复合词数(")
.append(data.size())
.append("): <br/><br/>\n");
html.append("<table border=\"1\">\n");
AtomicInteger i = new AtomicInteger();
data
.entrySet()
.stream()
.filter(entry -> !entry.getValue().isEmpty())
.sorted((a, b) -> b.getValue().size() - a.getValue().size())
.forEach(entry -> {
html.append("\t<tr><td>")
.append(i.incrementAndGet())
.append("</td><td>")
.append(WordLinker.toLink(entry.getKey().getWord(), dictionary))
.append("</td>");
entry
.getValue()
.values()
.forEach(words -> {
words.forEach(word -> {
html.append("<td>")
.append(WordLinker.toLink(word.getWord(), dictionary))
.append("</td>");
elements.add(word);
});
});
html.append("</tr>\n");
});
html.append("</table>\n");
if(elements.isEmpty() || data.size() <= 1){
return html.toString();
}
html.append("\n<br/>不重复的被组合词数(")
.append(elements.size())
.append("): <br/><br/>\n");
List<String> words = elements
.stream()
.sorted()
.map(word -> WordLinker.toLink(word.getWord(), dictionary))
.collect(Collectors.toList());
html.append(toHtmlTableFragment(words, rowLength));
return html.toString();
}
public static String toHtmlForPartOfSpeech(Map<String, Set<String>> data){
return toHtmlForPartOfSpeech(data, Dictionary.ICIBA);
}
public static String toHtmlForPartOfSpeech(Map<String, Set<String>> data, Dictionary dictionary){
StringBuilder html = new StringBuilder();
html.append("<h4>各大词性广泛度排名:</h4><br/>\n");
AtomicInteger i = new AtomicInteger();
data.entrySet().stream().sorted((a, b) -> b.getValue().size() - a.getValue().size()).forEach(e -> {
String k = e.getKey();
html.append(i.incrementAndGet())
.append("、")
.append(RED_EM_PRE)
.append(k)
.append(RED_EM_SUF)
.append("(")
.append(BLUE_EM_PRE)
.append(PartOfSpeech.getMeaning(k))
.append(BLUE_EM_SUF)
.append(") (词数:")
.append(data.get(k).size())
.append(")")
.append("<br/>\n");
});
html.append("<h4>各大词性及其包括的词:</h4><br/>\n");
AtomicInteger j = new AtomicInteger();
data.keySet().stream().sorted().forEach(k -> {
html.append("<h4>")
.append(j.incrementAndGet())
.append("、")
.append(RED_EM_PRE)
.append(k)
.append(RED_EM_SUF)
.append("(")
.append(BLUE_EM_PRE)
.append(PartOfSpeech.getMeaning(k))
.append(BLUE_EM_SUF)
.append(") (词数:")
.append(data.get(k).size())
.append(")")
.append("</h4>\n")
.append(
toHtmlTableFragment(data.get(k).stream().sorted().map(w -> WordLinker.toLink(w, dictionary)).collect(Collectors.toList()), 5));
});
return html.toString();
}
public static String toHtmlForPluralFormat(Map<String, String> data){
return toHtmlForPluralFormat(data, Dictionary.ICIBA);
}
public static String toHtmlForPluralFormat(Map<String, String> data, Dictionary dictionary){
StringBuilder html = new StringBuilder();
html.append("<table border=\"1\">\n")
.append("\t<tr><td>单词原型</td><td>单词复数</td></tr>\n");
data.keySet().forEach(key -> {
String origin = key.substring(0, key.length() - data.get(key).length());
html.append("\t<tr><td>").append(WordLinker.toLink(origin, dictionary)).append("</td><td>")
.append(WordLinker.toLink(key, origin, BLUE_EM_PRE, BLUE_EM_SUF + "-", dictionary)).append("</td></tr>\n");
});
html.append("</table>\n");
return html.toString();
}
public static String toHtmlForWordDefinition(Set<Word> words, int rowLength){
return toHtmlForWordDefinition(words, rowLength, Dictionary.ICIBA);
}
public static String toHtmlForWordDefinition(Set<Word> words, int rowLength, Dictionary dictionary) {
Map<Integer, AtomicInteger> map = new HashMap<>();
words.stream().forEach(w -> {
int count = w.getDefinitions().size();
map.putIfAbsent(count, new AtomicInteger());
map.get(count).incrementAndGet();
});
List<String> data =
words
.stream()
.sorted((a, b) -> b.getDefinitions().size() - a.getDefinitions().size())
.map(word -> WordLinker.toLink(word.getWord(), dictionary)+"-"+word.getDefinitions().size())
.collect(Collectors.toList());
StringBuilder html = new StringBuilder();
html.append(toHtmlTableFragment(data, rowLength))
.append("<table border=\"1\">\n")
.append("\t<tr><td>定义条数</td><td>单词个数</td></tr>\n");
map.keySet().stream().sorted((a,b)->b-a).forEach(key -> {
html.append("\t<tr><td>").append(key).append("</td><td>").append(map.get(key)).append("</td></tr>\n");
});
html.append("</table>\n");
return html.toString();
}
public static String toHtmlForAntonym(Set<SynonymAntonym> synonymAntonyms, int rowLength){
return toHtmlForAntonym(synonymAntonyms, rowLength, Dictionary.ICIBA);
}
public static String toHtmlForAntonym(Set<SynonymAntonym> synonymAntonyms, int rowLength, Dictionary dictionary){
StringBuilder html = new StringBuilder();
AtomicInteger i = new AtomicInteger();
synonymAntonyms
.stream()
.sorted((a, b) -> b.getAntonym().size() - a.getAntonym().size())
.forEach(sa -> {
if (!sa.getAntonym().isEmpty()) {
html.append("<h4>")
.append(i.incrementAndGet())
.append("、")
.append(WordLinker.toLink(sa.getWord().getWord(), dictionary))
.append("</h4>\n")
.append("<b>反义词(")
.append(sa.getAntonym().size())
.append("):</b><br/>\n");
List<String> sm = sa.getAntonym().stream().sorted().map(w -> WordLinker.toLink(w.getWord(), dictionary)).collect(Collectors.toList());
html.append(toHtmlTableFragment(sm, rowLength))
.append("<br/>\n");
}
});
return html.toString();
}
public static String toHtmlForSynonymAntonym(Set<SynonymAntonym> synonymAntonyms, int rowLength){
return toHtmlForSynonymAntonym(synonymAntonyms, rowLength, Dictionary.ICIBA);
}
public static String toHtmlForSynonymAntonym(Set<SynonymAntonym> synonymAntonyms, int rowLength, Dictionary dictionary){
StringBuilder html = new StringBuilder();
AtomicInteger i = new AtomicInteger();
synonymAntonyms
.stream()
.sorted((a, b) -> b.size() - a.size())
.forEach(sa -> {
html.append("<h4>")
.append(i.incrementAndGet())
.append("、")
.append(WordLinker.toLink(sa.getWord().getWord(), dictionary))
.append("</h4>\n");
if (!sa.getSynonym().isEmpty()) {
html.append("<b>同义词(").append(sa.getSynonym().size()).append("):</b><br/>\n");
List<String> sm = sa.getSynonym().stream().sorted().map(w -> WordLinker.toLink(w.getWord(), dictionary)).collect(Collectors.toList());
html.append(toHtmlTableFragment(sm, rowLength));
}
if (!sa.getAntonym().isEmpty()) {
html.append("<b>反义词(").append(sa.getAntonym().size()).append("):</b><br/>\n");
List<String> sm = sa.getAntonym().stream().sorted().map(w -> WordLinker.toLink(w.getWord(), dictionary)).collect(Collectors.toList());
html.append(toHtmlTableFragment(sm, rowLength));
}
html.append("<br/>\n");
});
return html.toString();
}
public static String toHtmlForSynonymDiscrimination(Set<SynonymDiscrimination> synonymDiscrimination){
return toHtmlForSynonymDiscrimination(synonymDiscrimination, Dictionary.ICIBA);
}
public static String toHtmlForSynonymDiscrimination(Set<SynonymDiscrimination> synonymDiscrimination, Dictionary dictionary){
StringBuilder html = new StringBuilder();
AtomicInteger i = new AtomicInteger();
synonymDiscrimination
.stream()
.sorted()
.forEach(sd -> {
html.append("<h4>")
.append(i.incrementAndGet())
.append("、")
.append(sd.getTitle())
.append("</h4>\n<b>")
.append(sd.getDes().replace("“", "“" + BLUE_EM_PRE).replace("”", BLUE_EM_SUF +"”"))
.append("</b><br/>\n");
if (!sd.getWords().isEmpty()) {
html.append("<ol>\n");
}
sd.getWords()
.forEach(w -> {
html.append("\t<li>")
.append(WordLinker.toLink(w.getWord(), dictionary))
.append(":")
.append(w.getMeaning())
.append("</li>\n");
});
if (!sd.getWords().isEmpty()) {
html.append("</ol>\n");
}
});
return html.toString();
}
public static String toHtmlTableFragmentForRootAffix(Map<Word, List<Word>> rootAffixToWords, int rowLength){
return toHtmlTableFragmentForRootAffix(rootAffixToWords, rowLength, Dictionary.ICIBA);
}
public static String toHtmlTableFragmentForRootAffix(Map<Word, List<Word>> rootAffixToWords, int rowLength, Dictionary dictionary) {
StringBuilder html = new StringBuilder();
AtomicInteger rootCounter = new AtomicInteger();
Set<Word> unique = new HashSet<>();
rootAffixToWords
.keySet()
.stream()
.sorted()
.forEach(rootAffix -> {
List<Word> words = rootAffixToWords.get(rootAffix);
html.append("<h4>")
.append(rootCounter.incrementAndGet())
.append("、")
.append(rootAffix.getWord());
if(StringUtils.isNotBlank(rootAffix.getMeaning())) {
html.append(" (")
.append(rootAffix.getMeaning())
.append(") ");
}
html.append(" (hit ")
.append(words.size())
.append(")</h4>\n");
List<String> data =
words
.stream()
.sorted()
.map(word -> {
unique.add(word);
return emphasize(word, rootAffix, dictionary);
})
.collect(Collectors.toList());
html.append(toHtmlTableFragment(data, rowLength));
});
String head = "词根词缀数:"+rootAffixToWords.keySet().size()+",单词数:"+unique.size()+"<br/>\n";
return head+html.toString();
}
public static String emphasize(Word word, Word rootAffix){
return emphasize(word, rootAffix, Dictionary.ICIBA);
}
public static String emphasize(Word word, Word rootAffix, Dictionary dictionary){
String w = word.getWord();
String r = rootAffix.getWord().replace("-", "").toLowerCase();
//词就是词根
if (w.length() == r.length()) {
return WordLinker.toLink(w, r, dictionary);
}
//词根在中间
if (w.length() > r.length()
&& !w.startsWith(r)
&& !w.endsWith(r)) {
return WordLinker.toLink(w, r, "-" + RED_EM_PRE, RED_EM_SUF + "-", dictionary);
}
//词根在前面
if (w.length() > r.length() && w.startsWith(r)) {
return WordLinker.toLink(w, r, "" + RED_EM_PRE, RED_EM_SUF + "-", dictionary);
}
//词根在后面面
if (w.length() > r.length() && w.endsWith(r)) {
return WordLinker.toLink(w, r, "-" + RED_EM_PRE, RED_EM_SUF + "", dictionary);
}
return WordLinker.toLink(w, r, dictionary);
}
public static String toHtmlTableFragment(Map<Word, AtomicInteger> words, int rowLength){
return toHtmlTableFragment(words, rowLength, Dictionary.ICIBA);
}
public static String toHtmlTableFragment(Map<Word, AtomicInteger> words, int rowLength, Dictionary dictionary) {
return toHtmlTableFragment(words.entrySet(), rowLength, dictionary);
}
public static String toHtmlTableFragment(Set<Map.Entry<Word, AtomicInteger>> words, int rowLength){
return toHtmlTableFragment(words, rowLength, Dictionary.ICIBA);
}
public static String toHtmlTableFragment(Set<Map.Entry<Word, AtomicInteger>> words, int rowLength, Dictionary dictionary) {
List<String> data =
words
.stream()
.sorted((a, b) -> b.getValue().get() - a.getValue().get())
.map(entry -> {
String link = WordLinker.toLink(entry.getKey().getWord(), dictionary);
if (entry.getValue().get() > 0) {
link = link+"-"+entry.getValue().get();
}
return link;
})
.collect(Collectors.toList());
return toHtmlTableFragment(data, rowLength);
}
public static List<String> toHtmlTableFragmentForIndependentWord(Map<Word, List<Word>> data, int rowLength, int wordsLength){
return toHtmlTableFragmentForIndependentWord(data, rowLength, wordsLength, Dictionary.ICIBA);
}
public static List<String> toHtmlTableFragmentForIndependentWord(Map<Word, List<Word>> data, int rowLength, int wordsLength, Dictionary dictionary) {
List<String> htmls = new ArrayList<>();
StringBuilder html = new StringBuilder();
AtomicInteger wordCounter = new AtomicInteger();
data
.keySet()
.stream()
.sorted()
.forEach(word -> {
html.append("<h4>")
.append(wordCounter.incrementAndGet())
.append("、")
.append(word.getWord())
.append(" (form ")
.append(data.get(word).size())
.append(")</h4>\n");
List<String> result = data
.get(word)
.stream()
.map(rootAffix -> emphasize(word, rootAffix, dictionary))
.collect(Collectors.toList());
html.append(toHtmlTableFragment(result, rowLength));
result.clear();
result = data
.get(word)
.stream()
.flatMap(rootAffix -> Arrays.asList(rootAffix.getWord(), rootAffix.getMeaning()).stream())
.collect(Collectors.toList());
html.append(toHtmlTableFragment(result, 2));
result.clear();
if (wordCounter.get() % wordsLength == 0) {
htmls.add(html.toString());
html.setLength(0);
}
});
if(html.length() > 0){
htmls.add(html.toString());
}
return htmls;
}
public static String toHtmlTableFragment(List<String> data, int rowLength) {
StringBuilder html = new StringBuilder();
AtomicInteger rowCounter = new AtomicInteger();
AtomicInteger wordCounter = new AtomicInteger();
html.append("<table border=\"1\">\n");
data
.forEach(datum -> {
if (wordCounter.get() % rowLength == 0) {
if (wordCounter.get() == 0) {
html.append("\t<tr>");
} else {
html.append("</tr>\n\t<tr>");
}
rowCounter.incrementAndGet();
html.append("<td>").append(rowCounter.get()).append("</td>");
}
wordCounter.incrementAndGet();
html.append("<td>").append(datum).append("</td>");
});
if(html.toString().endsWith("<tr>")){
html.setLength(html.length()-5);
}else{
html.append("</tr>\n");
}
html.append("</table>\n");
return html.toString();
}
}