/******************************************************************************* * Copyright (c) 2012 György Orosz, Attila Novák. * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at * http://www.gnu.org/licenses/ * * This file is part of PurePos. * * PurePos is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PurePos is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * Contributors: * György Orosz - initial API and implementation ******************************************************************************/ package hu.ppke.itk.nlpg.purepos.model.internal; import java.io.Serializable; import java.util.HashMap; import java.util.Map; /** * Trie node, storing frequencies / probabilities of a given n-gram part * * @author György Orosz * * @param <I> * type for nodeID * @param <N> * type for frequency / probability * @param <W> * type for words */ public abstract class TrieNode<I, N extends Number, W> implements Serializable { /** * */ private static final long serialVersionUID = -5867650276492059939L; // TODO: PERF: is it worth using berkeleylm, instead of my implementation: // it is // known to be fast and small protected final I id; protected N num; protected HashMap<I, TrieNode<I, N, W>> childNodes; protected final HashMap<W, N> words; /** * Zero element of the given Number type * * @return */ protected abstract N zero(); /** * Incrementing the given Number object * * @param n * @return */ protected abstract N increment(N n); protected abstract TrieNode<I, N, W> createNode(I id); /** * Constructor * * @param id * The id of the node (usually the context part) * @param word * word to store */ public TrieNode(I id, W word) { this(id); addWord(word); } protected TrieNode(I id) { this.id = id; num = zero(); words = new HashMap<W, N>(); } /** * Add a word the the node, and increments its frequency. * * @param word */ protected void addWord(W word) { if (words.containsKey(word)) { words.put(word, increment(words.get(word))); } else { words.put(word, increment(zero())); } num = increment(num); } /** * Add a child to the node. * * @param child */ public TrieNode<I, N, W> addChild(I child) { if (childNodes == null) { childNodes = new HashMap<I, TrieNode<I, N, W>>(); } if (!childNodes.containsKey(child)) { TrieNode<I, N, W> childNode = createNode(child); childNodes.put(child, childNode); return childNode; } else { return childNodes.get(child); } } /** * Return the id of the node. * * @return */ public I getId() { return id; } /** * Returns the frequency / probability of the node. * * @return */ public N getNum() { return num; } /** * Returns child nodes. * * @return */ public Map<I, TrieNode<I, N, W>> getChildNodes() { return childNodes; } /** * Returns words and their frequencies / probabilities. * * @return */ public Map<W, N> getWords() { return words; } /** * Returns true if has a child node with the specified id. * * @param id * @return */ public boolean hasChild(I id) { if (childNodes == null) return false; return childNodes.containsKey(id); } /** * Returns the node with the specified id. * * @param id * @return */ public TrieNode<I, N, W> getChild(I id) { if (childNodes == null) return null; return childNodes.get(id); } /** * Returns true if this node has the requested word. * * @param word * @return */ public boolean hasWord(W word) { if (words == null) return false; return words.containsKey(word); } /** * Returns the numeric value according to the word. * * @param word * @return */ public N getWord(W word) { return words.get(word); } @Override public String toString() { return "(id:" + getId() // + ", childs:" + childNodes.toString() + ", words:" + words.toString() + ")"; } public String getReprString() { return getReprString("\t"); } public String getReprString(String tab) { String ret = tab; ret += "(id:" + getId() + ", freq:" + num; ret += ", words:" + words.toString(); if (childNodes != null && childNodes.size() > 0) { ret += ", childs:\n"; for (TrieNode<I, N, W> node : childNodes.values()) ret += node.getReprString(tab + tab); } ret += tab + ")\n"; return ret; } }