Token.java example

Explorer

MoCA-master
- MoCA
  - src
    - jp
      - ac
        waseda
        info
        kake
        moca
        MoCA.java
        MocaTokenizer.java
        dict
        MocaDictionaries.java
        string
        KanaConverter.java
        Levenshtein.java
        StringSizeConverter.java
        syllable
        SyllabifiedString.java
        Syllable.java
        system
        InputMain.java
        PrintIntegerMaker.java
        tools
        MocaDictionaryBuilder.java
        MocaTokenInfoDictionaryBuilder.java
        viterbi
        MocaViterbi.java
        wordcost
        AbstractWordCostAdjuster.java
        BaseWordCostAdjuster.java
        KanaWordCostAdjuster.java
        LevenshteinWordCostAdjuster.java
        MultipliedWordCostAdjuster.java
        UnknownWordCostAdjuster.java
- TonixyKuromoji
  - src
    - TonixyKuromoji.java
    - jp
      - ac
        waseda
        info
        kake
        string
        KanaConverter.java
        Levenshtein.java
        StringSizeConverter.java
        system
        InputMain.java
        PrintIntegerMaker.java
    - org
      - atilika
        kuromoji
        DebugTokenizer.java
        Token.java
        Tokenizer.java
        TokenizerRunner.java
        dict
        CharacterDefinition.java
        ConnectionCosts.java
        Dictionaries.java
        Dictionary.java
        TokenInfoDictionary.java
        UnknownDictionary.java
        UserDictionary.java
        trie
        DoubleArrayTrie.java
        Trie.java
        util
        CSVUtil.java
        ConnectionCostsBuilder.java
        DictionaryBuilder.java
        DoubleArrayTrieBuilder.java
        TokenInfoDictionaryBuilder.java
        UnknownDictionaryBuilder.java
        viterbi
        Viterbi.java
        ViterbiFormatter.java
        ViterbiNode.java

/**
 * Copyright © 2010-2012 Atilika Inc.  All rights reserved.
 *
 * Atilika Inc. licenses this file to you under the Apache License, Version
 * 2.0 (the "License"); you may not use this file except in compliance with
 * the License.  A copy of the License is distributed with this work in the
 * LICENSE.txt file.  You may also obtain a copy of the License from
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */
package org.atilika.kuromoji;

import org.atilika.kuromoji.dict.Dictionary;
import org.atilika.kuromoji.viterbi.ViterbiNode.Type;

import jp.ac.waseda.info.kake.string.KanaConverter;

/**
 * @author Masaru Hasegawa
 * @author Christian Moen
 */
public class Token {
	private final Dictionary dictionary;

	private final int wordId;

	private final String surfaceForm;

	private final int position;

	private final Type type;

	public Token(int wordId, String surfaceForm, Type type, int position, Dictionary dictionary) {
		this.wordId = wordId;
		this.surfaceForm = surfaceForm;
		this.type = type;
		this.position = position;
		this.dictionary = dictionary;
	}

	/**
	 * @return surfaceForm
	 */
	public String getSurfaceForm() {
		return surfaceForm;
	}

	/**
	 * Returns base form or null if it doens't exist, i.e. for unknown words of user dictionary terms
	 *
	 * @return base form or null if non-existent
	 */
	public String getBaseForm() {
		return dictionary.getBaseForm(wordId);
	}

	/**
	 * @return all features
	 */
	public String getAllFeatures() {
		return dictionary.getAllFeatures(wordId);
	}

	/**
	 * @return all features as array
	 */
	public String[] getAllFeaturesArray() {
		return dictionary.getAllFeaturesArray(wordId);
	}

	/**
	 * @return reading. null if token doesn't have reading.
	 */
	public String getReading() {
		return dictionary.getReading(wordId);
	}

	/**
	 * @return part of speech.
	 */
	public String getPartOfSpeech() {
		return dictionary.getPartOfSpeech(wordId);
	}

	/**
	 * Returns true if this token is known word
	 * @return true if this token is in standard dictionary. false if not.
	 */
	public boolean isKnown() {
		return type == Type.KNOWN;
	}

	/**
	 * Returns true if this token is unknown word
	 * @return true if this token is unknown word. false if not.
	 */
	public boolean isUnknown() {
		return type == Type.UNKNOWN;
	}

	/**
	 * Returns true if this token is defined in user dictionary
	 * @return true if this token is in user dictionary. false if not.
	 */
	public boolean isUser() {
		return type == Type.USER;
	}

	/**
	 * Get index of this token in input text
	 * @return position of token
	 */
	public int getPosition() {
		return position;
	}

	// TONIXY getBaseFormは未知語でnullになる。nullを返さないgetBaseOrSurfaceFormを追加。

	public String getBaseOrSurfaceForm() {
		String res = getBaseForm();
		if (res == null)
			return surfaceForm;
		return res;
	}

	// TONIXY getReadingは未知語で必ずnullになる。かな文字のみの未知語ではnullを返さないgetReadingOrKatakanaを追加。

	public String getReadingOrKatakana() {
		String res = getReading();
		if (res != null)
		return res;
		if (KanaConverter.isKana(surfaceForm))
			return KanaConverter.getKatakana(surfaceForm);
		return null;
	}

	// TONIXY word cost、left ID、right IDを取得できるよう、メソッドを追加。

	/**
	 * @return word cost
	 */
	public int getWordCost() {
		return dictionary.getWordCost(wordId);
	}

	/**
	 * @return left ID
	 */
	public int getLeftId() {
		return dictionary.getLeftId(wordId);
	}

	/**
	 * @return right ID
	 */
	public int getRightId() {
		return dictionary.getRightId(wordId);
	}
}