package org.cdlib.xtf.textEngine; /** * Copyright (c) 2007, Regents of the University of California * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the University of California nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ import org.apache.lucene.spelt.WordEquiv; import org.cdlib.xtf.util.CharMap; import org.cdlib.xtf.util.FastStringCache; import org.cdlib.xtf.util.WordMap; /** Used for eliminating redundant spelling suggestions */ public class XtfWordEquiv implements WordEquiv { private CharMap accentMap; private WordMap pluralMap; private StdTermFilter stdTermFilter = new StdTermFilter(); private FastStringCache recent = new FastStringCache(1000); public XtfWordEquiv(CharMap accentMap, WordMap pluralMap) { this.accentMap = accentMap; this.pluralMap = pluralMap; } /** * Checks if two words can be considered equivalent, and thus not form a * real spelling suggestion. */ public boolean isEquivalent(String word1, String word2) { // Filter both words (convert to lower case, remove plurals, etc.) word1 = filter(word1); word2 = filter(word2); // And compare the filtered versions. return word1.equals(word2); } private String filter(String in) { String out = (String)recent.get(in); if (out == null) { out = stdTermFilter.filter(in); // Next, ignore accents. String tmp; if (accentMap != null) { tmp = accentMap.mapWord(out); if (tmp != null) out = tmp; } // Then ignore plurals. tmp = pluralMap.lookup(out); if (pluralMap != null) { if (tmp != null) out = tmp; recent.put(in, out); } } return out; } }