/** * Copyright © 2010-2012 Atilika Inc. All rights reserved. * * Atilika Inc. licenses this file to you under the Apache License, Version * 2.0 (the "License"); you may not use this file except in compliance with * the License. A copy of the License is distributed with this work in the * LICENSE.txt file. You may also obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package org.atilika.kuromoji.viterbi; import java.util.HashMap; import java.util.List; import java.util.Map; import org.atilika.kuromoji.dict.ConnectionCosts; import org.atilika.kuromoji.viterbi.ViterbiNode.Type; /** * クラス名が変わった * * @author Masaru Hasegawa * @author Christian Moen */ public class ViterbiFormatter { private final static String BOS_LABEL = "BOS"; private final static String EOS_LABEL = "EOS"; private final static String FONT_NAME = "Helvetica"; private ConnectionCosts costs; private Map<String, ViterbiNode> nodeMap; private Map<String, String> bestPathMap; private boolean foundBOS; public ViterbiFormatter(ConnectionCosts costs) { this.costs = costs; this.nodeMap = new HashMap<String, ViterbiNode>(); this.bestPathMap = new HashMap<String, String>(); } public String format(ViterbiNode[][] startsArray, ViterbiNode[][] endsArray) { initBestPathMap(null); StringBuilder sb = new StringBuilder(); sb.append(formatHeader()); sb.append(formatNodes(startsArray, endsArray)); sb.append(formatTrailer()); return sb.toString(); } public String format(ViterbiNode[][] startsArray, ViterbiNode[][] endsArray, List<ViterbiNode> bestPath) { // List<ViterbiNode> bestPathWithBOSAndEOS = new ArrayList<ViterbiNode>(bastPath); initBestPathMap(bestPath); StringBuilder sb = new StringBuilder(); sb.append(formatHeader()); sb.append(formatNodes(startsArray, endsArray)); sb.append(formatTrailer()); return sb.toString(); } private void initBestPathMap(List<ViterbiNode> bestPath) { this.bestPathMap.clear(); if (bestPath == null){ return; } for (int i = 0; i < bestPath.size() - 1; i++) { ViterbiNode from = bestPath.get(i); ViterbiNode to = bestPath.get(i + 1); String fromId = getNodeId(from); String toId = getNodeId(to); assert this.bestPathMap.containsKey(fromId) == false; assert this.bestPathMap.containsValue(toId) == false; this.bestPathMap.put(fromId, toId); } } private String formatNodes(ViterbiNode[][] startsArray, ViterbiNode[][] endsArray) { this.nodeMap.clear(); this.foundBOS = false; StringBuilder sb = new StringBuilder(); for (int i = 1; i < endsArray.length; i++) { if(endsArray[i] == null || startsArray[i] == null) { continue; } for (int j = 0; j < endsArray[i].length; j++) { ViterbiNode from = endsArray[i][j]; if(from == null){ continue; } sb.append(formatNodeIfNew(from)); for (int k = 0; k < startsArray[i].length; k++) { ViterbiNode to = startsArray[i][k]; if(to == null){ break; } sb.append(formatNodeIfNew(to)); sb.append(formatEdge(from, to)); } } } return sb.toString(); } private String formatNodeIfNew(ViterbiNode node) { String nodeId = getNodeId(node); if (! this.nodeMap.containsKey(nodeId)) { this.nodeMap.put(nodeId, node); return formatNode(node); } else { return ""; } } private String formatHeader() { StringBuilder sb = new StringBuilder(); sb.append("digraph viterbi {\n"); sb.append("graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\" ];\n"); sb.append("# A2 paper size\n"); sb.append("size = \"34.4,16.5\";\n"); sb.append("# try to fill paper\n"); sb.append("ratio = fill;\n"); sb.append("edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n"); sb.append("node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME + "\" ]\n"); return sb.toString(); } private String formatTrailer() { return "}"; } private String formatEdge(ViterbiNode from, ViterbiNode to) { if (this.bestPathMap.containsKey(getNodeId(from)) && this.bestPathMap.get(getNodeId(from)).equals(getNodeId(to))) { return formatEdge(from, to, "color=\"#40e050\" fontcolor=\"#40a050\" penwidth=3 fontsize=20 "); } else { return formatEdge(from, to, ""); } } private String formatEdge(ViterbiNode from, ViterbiNode to, String attributes) { StringBuilder sb = new StringBuilder(); sb.append(getNodeId(from)); sb.append(" -> "); sb.append(getNodeId(to)); sb.append(" [ "); sb.append("label=\""); sb.append(getCost(from, to)); sb.append("\""); sb.append(" "); sb.append(attributes); sb.append(" "); sb.append(" ]"); sb.append("\n"); return sb.toString(); } private String formatNode(ViterbiNode node) { StringBuilder sb = new StringBuilder(); sb.append("\""); sb.append(getNodeId(node)); sb.append("\""); sb.append(" [ "); sb.append("label="); sb.append(formatNodeLabel(node)); sb.append(" ]"); return sb.toString(); } private String formatNodeLabel(ViterbiNode node) { StringBuilder sb = new StringBuilder(); sb.append("<<table border=\"0\" cellborder=\"0\">"); sb.append("<tr><td>"); sb.append(getNodeLabel(node)); sb.append("</td></tr>"); sb.append("<tr><td>"); sb.append("<font color=\"blue\">"); sb.append(node.getWordCost()); sb.append("</font>"); sb.append("</td></tr>"); // sb.append("<tr><td>"); // sb.append(this.dictionary.get(node.getWordId()).getPosInfo()); // sb.append("</td></tr>"); sb.append("</table>>"); return sb.toString(); } private String getNodeId(ViterbiNode node) { return String.valueOf(node.hashCode()); } private String getNodeLabel(ViterbiNode node) { if (node.getType() == Type.KNOWN && node.getWordId() == 0) { if (this.foundBOS) { return EOS_LABEL; } else { this.foundBOS = true; return BOS_LABEL; } } else { return node.getSurfaceForm(); } } private int getCost(ViterbiNode from, ViterbiNode to) { return this.costs.get(from.getLeftId(), to.getRightId()); } }