/* * JOrtho * * Copyright (C) 2005-2008 by i-net software * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. * * Created on 02.11.2005 */ package com.inet.jortho; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * @author Volker Berlin */ abstract class DictionaryBase { protected static final char LAST_CHAR = 0xFFFF; protected int idx; protected int size; protected char[] tree; /** * Empty Constructor. */ protected DictionaryBase() { /* empty */ } DictionaryBase(final char[] tree) { this.tree = tree; size = tree.length; } /** * Returns an int that describe the dissimilarity of the characters. * The value is ever larger 0. A value of means only a small difference. * @param a first char * @param b second char * @return the dissimilarity */ private int charDiff(char a, char b) { a = Character.toLowerCase(a); b = Character.toLowerCase(b); if (a == b) { return 1; } if (Character.getType(a) != Character.getType(b)) { return 6; } return 5; } /** * Check if the word exist in this dictinary. * @param word the word to check. Can't be null. * @return true if the word exist. */ public boolean exist(final String word) { idx = 0; for (int i = 0; i < word.length(); i++) { final char c = word.charAt(i); while (idx < size && tree[idx] < c) { idx += 3; } if ((idx >= size || tree[idx] != c)) { return false; } if (i == word.length() - 1 && isWordMatch()) { return true; } idx = readIndex(); if (idx <= 0) { return false; } } return false; } /** * Check if on the current item position a word ends. */ private boolean isWordMatch() { return (tree[idx + 1] & 0x8000) > 0; } /** * Read the offset in the tree of the next character. */ final int readIndex() { return ((tree[idx + 1] & 0x7fff) << 16) + tree[idx + 2]; } /** * Search if the character exist in the current node. If found then the variable <code>idx</code> point to the location. * If not found then it point on the next character (char value) item in the node. * @param c the searching character * @return true if found */ private boolean searchChar(final char c) { while (idx < size && tree[idx] < c) { idx += 3; } if ((idx >= size || tree[idx] != c)) { return false; } return true; } /** * Returns a list of suggestions if the word is not in the dictionary. * @param word the wrong spelled word. Can't be null. * @return a list of class Suggestion. * @see Suggestion */ public List<Suggestion> searchSuggestions(final String word) { if (word.length() == 0 || exist(word)) { return new ArrayList<Suggestion>(); } final Suggestions suggesions = new Suggestions(Math.min(20, 4 + word.length())); idx = 0; searchSuggestions(suggesions, word, 0, 0, 0); final List<Suggestion> list = suggesions.getlist(); Collections.sort(list); return list; } /** * Es wird nach verschiedenen Regeln nach aehnlichen Woertern gesucht. * Je nach Regel gibt es einen anderen diff. Jekleiner der diff desto aehnlicher. * Diese Methode ruft sich rekursiv auf. * @param list Kontainer fuer die gefundenen Woerter * @param chars bis zur charPosition bereits gemappte Buchstaben, danach noch zu mappende des orignal Wortes * @param charPosition Zeichenposition im char array * @param lastIdx Position im Suchindex der zur aktuellen Zeichenposition zeigt. * @param diff Die Unaehnlichkeit bis zur aktuellen Zeichenposition */ private void searchSuggestions(final Suggestions list, final CharSequence chars, final int charPosition, final int lastIdx, final int diff) { if (diff > list.getMaxDissimilarity()) { return; } // First with the correct letters to go on idx = lastIdx; char currentChar = chars.charAt(charPosition); if (searchChar(currentChar)) { if (isWordMatch()) { if (charPosition + 1 == chars.length()) { // exact match at this character position list.add(new Suggestion(chars, diff)); } else { // a shorter match, we need to cut the string final int length = charPosition + 1; final CharSequence chars2 = chars.subSequence(0, length); list.add(new Suggestion(chars2, diff + (chars.length() - length) * 5)); } } idx = readIndex(); if (idx <= 0) { // no more characters in the tree return; } if (charPosition + 1 == chars.length()) { searchSuggestionsLonger(list, chars, chars.length(), idx, diff + 5); return; } searchSuggestions(list, chars, charPosition + 1, idx, diff); } // transposed letters and additional letter if (charPosition + 1 < chars.length()) { idx = lastIdx; currentChar = chars.charAt(charPosition + 1); if (searchChar(currentChar)) { final int tempIdx = idx; //transposed letters (German - Buchstabendreher) idx = readIndex(); if (idx > 0) { final StringBuilder buffer = new StringBuilder(chars); buffer.setCharAt(charPosition + 1, chars.charAt(charPosition)); buffer.setCharAt(charPosition, currentChar); searchSuggestions(list, buffer, charPosition + 1, idx, diff + 3); } // Additional character in the misspelled word idx = tempIdx; final StringBuilder buffer = new StringBuilder(); buffer.append(chars, 0, charPosition); buffer.append(chars, charPosition + 1, chars.length()); searchSuggestions(list, buffer, charPosition, lastIdx, diff + 5); } } // Missing letters, we need to add one character { int tempIdx = idx = lastIdx; while (idx < size && tree[idx] < LAST_CHAR) { final char newChar = tree[idx]; idx = readIndex(); if (idx > 0 && newChar != currentChar) { final StringBuilder buffer = new StringBuilder(chars); buffer.insert(charPosition, newChar); searchSuggestions(list, buffer, charPosition + 1, idx, diff + 5); } idx = tempIdx += 3; } } // Typos - wrong letters (One character is replaced with any character) if (charPosition < chars.length()) { currentChar = chars.charAt(charPosition); int tempIdx = idx = lastIdx; while (idx < size && tree[idx] < LAST_CHAR) { if (isWordMatch()) { final StringBuilder buffer = new StringBuilder(); buffer.append(chars, 0, charPosition); buffer.append(tree[idx]); list.add(new Suggestion(buffer, diff + 5 + (chars.length() - buffer.length()) * 5)); } if (charPosition + 1 < chars.length()) { final char newChar = tree[idx]; idx = readIndex(); if (idx > 0 && newChar != currentChar) { final StringBuilder buffer = new StringBuilder(chars); buffer.setCharAt(charPosition, newChar); searchSuggestions(list, buffer, charPosition + 1, idx, diff + charDiff(currentChar, newChar)); } } idx = tempIdx += 3; } } } private void searchSuggestionsLonger(final Suggestions list, final CharSequence chars, final int originalLength, final int lastIdx, final int diff) { idx = lastIdx; while (idx < size && tree[idx] < LAST_CHAR) { if (isWordMatch()) { list.add(new Suggestion(chars.toString() + tree[idx], diff)); } idx += 3; } } }