/* Jazzy - a Java library for Spell Checking Copyright (C) 2001 Mindaugas Idzelis Full text of license can be found in LICENSE.txt This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package com.swabunga.spell.engine; import java.io.EOFException; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.RandomAccessFile; import java.util.LinkedList; import java.util.List; /** * Another implementation of <code>SpellDictionary</code> that doesn't cache any words in memory. Avoids the huge footprint of * <code>SpellDictionaryHashMap</code> at the cost of relatively minor latency. A future version of this class that implements some caching * strategies might be a good idea in the future, if there's any demand for it. * * This implementation requires a special dictionary file, with "code*word" lines sorted by code. It's using a dichotomy algorithm to search * for words in the dictionary * * @author Damien Guillaume * @version 0.1 */ public class SpellDictionaryDichoDisk extends SpellDictionaryASpell { /** Holds the dictionary file for reading */ private RandomAccessFile dictFile = null; /** dictionary and phonetic file encoding */ private String encoding = null; /** * Dictionary Convienence Constructor. */ public SpellDictionaryDichoDisk(File wordList) throws FileNotFoundException, IOException { super((File) null); dictFile = new RandomAccessFile(wordList, "r"); } /** * Dictionary Convienence Constructor. */ public SpellDictionaryDichoDisk(File wordList, String encoding) throws FileNotFoundException, IOException { super((File) null); this.encoding = encoding; dictFile = new RandomAccessFile(wordList, "r"); } /** * Dictionary constructor that uses an aspell phonetic file to build the transformation table. */ public SpellDictionaryDichoDisk(File wordList, File phonetic) throws FileNotFoundException, IOException { super(phonetic); dictFile = new RandomAccessFile(wordList, "r"); } /** * Dictionary constructor that uses an aspell phonetic file to build the transformation table. */ public SpellDictionaryDichoDisk(File wordList, File phonetic, String encoding) throws FileNotFoundException, IOException { super(phonetic, encoding); this.encoding = encoding; dictFile = new RandomAccessFile(wordList, "r"); } /** * Add a word permanantly to the dictionary (and the dictionary file). <i>not implemented !</i> */ @Override public void addWord(String word) { System.err.println("error: addWord is not implemented for SpellDictionaryDichoDisk"); } /** * Search the dictionary file for the words corresponding to the code within positions p1 - p2 */ private LinkedList<String> dichoFind(String code, long p1, long p2) throws IOException { // System.out.println("dichoFind("+code+","+p1+","+p2+")"); long pm = (p1 + p2) / 2; dictFile.seek(pm); String l; if (encoding == null) { l = dictFile.readLine(); } else { l = dictReadLine(); } pm = dictFile.getFilePointer(); if (encoding == null) { l = dictFile.readLine(); } else { l = dictReadLine(); } long pm2 = dictFile.getFilePointer(); if (pm2 >= p2) { return seqFind(code, p1, p2); } int istar = l.indexOf('*'); if (istar == -1) { throw new IOException("bad format: no * !"); } String testcode = l.substring(0, istar); int comp = code.compareTo(testcode); if (comp < 0) { return dichoFind(code, p1, pm - 1); } else if (comp > 0) { return dichoFind(code, pm2, p2); } else { LinkedList<String> l1 = dichoFind(code, p1, pm - 1); LinkedList<String> l2 = dichoFind(code, pm2, p2); String word = l.substring(istar + 1); l1.add(word); l1.addAll(l2); return l1; } } private LinkedList<String> seqFind(String code, long p1, long p2) throws IOException { // System.out.println("seqFind("+code+","+p1+","+p2+")"); LinkedList<String> list = new LinkedList<String>(); dictFile.seek(p1); while (dictFile.getFilePointer() < p2) { String l; if (encoding == null) { l = dictFile.readLine(); } else { l = dictReadLine(); } int istar = l.indexOf('*'); if (istar == -1) { throw new IOException("bad format: no * !"); } String testcode = l.substring(0, istar); if (testcode.equals(code)) { String word = l.substring(istar + 1); list.add(word); } } return list; } /** * Read a line of dictFile with a specific encoding */ private String dictReadLine() throws IOException { int max = 255; byte b = 0; byte[] buf = new byte[max]; int i = 0; try { for (; b != '\n' && b != '\r' && i < max - 1; i++) { b = dictFile.readByte(); buf[i] = b; } } catch (EOFException ex) { } if (i == 0) { return ""; } String s = new String(buf, 0, i - 1, encoding); return s; } /** * Returns a list of strings (words) for the code. */ @Override public List<String> getWords(String code) { // System.out.println("getWords("+code+")"); LinkedList<String> list; try { list = dichoFind(code, 0, dictFile.length() - 1); // System.out.println(list); } catch (IOException ex) { System.err.println("IOException: " + ex.getMessage()); list = new LinkedList<String>(); } return list; } }