/** * Copyright © 2010-2012 Atilika Inc. All rights reserved. * * Atilika Inc. licenses this file to you under the Apache License, Version * 2.0 (the "License"); you may not use this file except in compliance with * the License. A copy of the License is distributed with this work in the * LICENSE.txt file. You may also obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package org.atilika.kuromoji; import org.atilika.kuromoji.dict.Dictionary; import org.atilika.kuromoji.viterbi.ViterbiNode.Type; import jp.ac.waseda.info.kake.string.KanaConverter; /** * @author Masaru Hasegawa * @author Christian Moen */ public class Token { private final Dictionary dictionary; private final int wordId; private final String surfaceForm; private final int position; private final Type type; public Token(int wordId, String surfaceForm, Type type, int position, Dictionary dictionary) { this.wordId = wordId; this.surfaceForm = surfaceForm; this.type = type; this.position = position; this.dictionary = dictionary; } /** * @return surfaceForm */ public String getSurfaceForm() { return surfaceForm; } /** * Returns base form or null if it doens't exist, i.e. for unknown words of user dictionary terms * * @return base form or null if non-existent */ public String getBaseForm() { return dictionary.getBaseForm(wordId); } /** * @return all features */ public String getAllFeatures() { return dictionary.getAllFeatures(wordId); } /** * @return all features as array */ public String[] getAllFeaturesArray() { return dictionary.getAllFeaturesArray(wordId); } /** * @return reading. null if token doesn't have reading. */ public String getReading() { return dictionary.getReading(wordId); } /** * @return part of speech. */ public String getPartOfSpeech() { return dictionary.getPartOfSpeech(wordId); } /** * Returns true if this token is known word * @return true if this token is in standard dictionary. false if not. */ public boolean isKnown() { return type == Type.KNOWN; } /** * Returns true if this token is unknown word * @return true if this token is unknown word. false if not. */ public boolean isUnknown() { return type == Type.UNKNOWN; } /** * Returns true if this token is defined in user dictionary * @return true if this token is in user dictionary. false if not. */ public boolean isUser() { return type == Type.USER; } /** * Get index of this token in input text * @return position of token */ public int getPosition() { return position; } // TONIXY getBaseFormは未知語でnullになる。nullを返さないgetBaseOrSurfaceFormを追加。 public String getBaseOrSurfaceForm() { String res = getBaseForm(); if (res == null) return surfaceForm; return res; } // TONIXY getReadingは未知語で必ずnullになる。かな文字のみの未知語ではnullを返さないgetReadingOrKatakanaを追加。 public String getReadingOrKatakana() { String res = getReading(); if (res != null) return res; if (KanaConverter.isKana(surfaceForm)) return KanaConverter.getKatakana(surfaceForm); return null; } // TONIXY word cost、left ID、right IDを取得できるよう、メソッドを追加。 /** * @return word cost */ public int getWordCost() { return dictionary.getWordCost(wordId); } /** * @return left ID */ public int getLeftId() { return dictionary.getLeftId(wordId); } /** * @return right ID */ public int getRightId() { return dictionary.getRightId(wordId); } }