/** * Copyright © 2010-2012 Atilika Inc. All rights reserved. * * Atilika Inc. licenses this file to you under the Apache License, Version * 2.0 (the "License"); you may not use this file except in compliance with * the License. A copy of the License is distributed with this work in the * LICENSE.txt file. You may also obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package org.atilika.kuromoji; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.atilika.kuromoji.Tokenizer.Mode; import org.atilika.kuromoji.dict.Dictionaries; import org.atilika.kuromoji.dict.UserDictionary; import org.atilika.kuromoji.viterbi.ViterbiFormatter; import org.atilika.kuromoji.viterbi.Viterbi; import org.atilika.kuromoji.viterbi.ViterbiNode; public class DebugTokenizer { private ViterbiFormatter formatter; private Viterbi viterbi; /** * TONIXY 非推奨。未知語分解モードがOFFになる。未知語分解モードの搭載後、従来通り動くためのコンストラクタ * * @param userDictionary * @param mode */ protected DebugTokenizer(UserDictionary userDictionary, Mode mode) { this(userDictionary, mode, false, false); } /** * TONIXY 未知語分解モードの搭載のため、引数追加 * * @param userDictionary * @param mode * @param unknownFixMode */ protected DebugTokenizer(UserDictionary userDictionary, Mode mode, boolean unknownFixMode, boolean convertsSize) { this.viterbi = new Viterbi(Dictionaries.getTrie(), Dictionaries.getDictionary(), Dictionaries.getUnknownDictionary(), Dictionaries.getCosts(), userDictionary, mode, unknownFixMode, convertsSize); this.formatter = new ViterbiFormatter(Dictionaries.getCosts()); } public String debugTokenize(String text) { ViterbiNode[][][] lattice = this.viterbi.build(text); List<ViterbiNode> bestPath = this.viterbi.search(lattice); return this.formatter.format(lattice[0], lattice[1], bestPath); } public static Builder builder() { return new Builder(); } public static class Builder { private Mode mode = Mode.NORMAL; private UserDictionary userDictionary = null; public synchronized Builder mode(Mode mode) { this.mode = mode; return this; } public synchronized Builder userDictionary(InputStream userDictionaryInputStream) throws IOException { this.userDictionary = UserDictionary.read(userDictionaryInputStream); return this; } public synchronized Builder userDictionary(String userDictionaryPath) throws FileNotFoundException, IOException { this.userDictionary(new BufferedInputStream(new FileInputStream(userDictionaryPath))); return this; } public synchronized DebugTokenizer build() { return new DebugTokenizer(userDictionary, mode); } } }