/*
* JOrtho
*
* Copyright (C) 2005-2008 by i-net software
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*
* Created on 15.06.2007
*/
package com.inet.jortho;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.zip.InflaterInputStream;
/**
* With the DictionaryFactory you can create / load a Dictionary. A Dictionary is list of word with a API for searching.
* The list is saved internal as a tree.
* @see Dictionary
* @author Volker Berlin
*/
class DictionaryFactory {
/**
* A node in the search tree. Every Node can include a list of NodeEnties
*/
private final static class Node extends ArrayList<NodeEntry> {
/**
*
*/
private static final long serialVersionUID = 1L;
Node() {
super(1);
}
int save(final DictionaryFactory factory) {
int idx;
final int start = idx = factory.size;
//reserve the needed memory
final int newSize = factory.size + size() * 3 + 1;
factory.checkSize(newSize);
factory.size = newSize;
for (int i = 0; i < size(); i++) {
final NodeEntry entry = get(i);
factory.tree[idx++] = entry.c;
final Node nextNode = entry.nextNode;
int offset = 0;
if (nextNode != null) {
offset = nextNode.save(factory);
}
if (entry.isWord) {
offset |= 0x80000000;
}
factory.tree[idx++] = (char) (offset >> 16);
factory.tree[idx++] = (char) (offset);
}
factory.tree[idx] = DictionaryBase.LAST_CHAR;
return start;
}
NodeEntry searchCharOrAdd(final char c) {
for (int i = 0; i < size(); i++) {
NodeEntry entry = get(i);
if (entry.c < c) {
continue;
}
if (entry.c == c) {
return entry;
}
entry = new NodeEntry(c);
add(i, entry);
trimToSize(); //reduce the memory consume, there is a very large count of this Nodes.
return entry;
}
final NodeEntry entry = new NodeEntry(c);
add(entry);
trimToSize(); //reduce the memory consume, there is a very large count of this Nodes.
return entry;
}
}
/**
* Descript a single charchter in the Dictionary tree.
*/
private final static class NodeEntry {
final char c;
boolean isWord;
Node nextNode;
NodeEntry(final char c) {
this.c = c;
}
/**
* Create a new Node and set it as nextNode
* @return the nextNode
*/
Node createNewNode() {
return nextNode = new Node();
}
}
private final Node root = new Node();
private int size;
private char[] tree;
/**
* Empty Constructor.
*/
public DictionaryFactory() {
/* empty */
}
/**
* Add a word to the tree. If it already exist then it has no effect.
* @param word the new word.
*/
public void add(final String word) {
Node node = root;
for (int i = 0; i < word.length(); i++) {
final char c = word.charAt(i);
final NodeEntry entry = node.searchCharOrAdd(c);
if (i == word.length() - 1) {
entry.isWord = true;
return;
}
final Node nextNode = entry.nextNode;
if (nextNode == null) {
node = entry.createNewNode();
}
else {
node = nextNode;
}
}
}
/**
* Check the size of the array and resize it if needed.
* @param newSize the requied size
*/
final void checkSize(final int newSize) {
if (newSize > tree.length) {
final char[] puffer = new char[Math.max(newSize, 2 * tree.length)];
System.arraycopy(tree, 0, puffer, 0, size);
tree = puffer;
}
}
/**
* Create from the data in this factory a Dictionary object. If there
* are no word added then the Dictionary is empty. The Dictionary need fewer memory as the DictionaryFactory.
* @return a Dictionary object.
*/
public Dictionary create() {
tree = new char[10000];
root.save(this);
//shrink the array
final char[] temp = new char[size];
System.arraycopy(tree, 0, temp, 0, size);
tree = temp;
return new Dictionary(tree);
}
/**
* Load the directory from plain a list of words. The words must be delimmited with newlines. This method can be
* called multiple times.
*
* @param stream
* a InputStream with words
* @param charsetName
* the name of a codepage for example "UTF8" or "Cp1252"
* @throws IOException
* If an I/O error occurs.
* @throws NullPointerException
* If stream or charsetName is null.
*/
public void loadPlainWordList(final InputStream stream, final String charsetName) throws IOException {
final Reader reader = new InputStreamReader(stream, charsetName);
loadPlainWordList(reader);
}
/**
* Load the directory from plain a list of words. The words must be delimmited with newlines. This method can be
* called multiple times.
*
* @param reader
* a Reader with words
* @throws IOException
* If an I/O error occurs.
* @throws NullPointerException
* If reader is null.
*/
public void loadPlainWordList(final Reader reader) throws IOException {
final BufferedReader input = new BufferedReader(reader);
String word = input.readLine();
while (word != null) {
if (word.length() > 1) {
add(word);
}
word = input.readLine();
}
}
/**
* Load the directory from a compressed list of words with UTF8 encoding. The words must be delimmited with
* newlines. This method can be called multiple times.
*
* @param filename
* the name of the file
* @throws IOException
* If an I/O error occurs.
* @throws NullPointerException
* If filename is null.
*/
public void loadWordList(final URL filename) throws IOException {
final URLConnection conn = filename.openConnection();
conn.setReadTimeout(5000);
InputStream input = conn.getInputStream();
input = new InflaterInputStream(input);
input = new BufferedInputStream(input);
loadPlainWordList(input, "UTF8");
}
}