/* * eXist Open Source Native XML Database * Copyright (C) 2001-09 Wolfgang M. Meier * wolfgang@exist-db.org * http://exist.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id$ */ package org.exist.xquery.functions.text; import java.util.List; import org.exist.dom.NodeSet; import org.exist.dom.QName; import org.exist.storage.TermMatcher; import org.exist.storage.TextSearchEngine; import org.exist.xquery.Cardinality; import org.exist.xquery.Constants; import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.DoubleValue; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; /** * @author Wolfgang Meier (wolfgang@exist-db.org) */ public class FuzzyMatchAll extends AbstractMatchFunction { public final static FunctionSignature signature = new FunctionSignature( new QName("fuzzy-match-all", TextModule.NAMESPACE_URI, TextModule.PREFIX), "Fuzzy keyword search, which compares strings based on the Levenshtein distance " + "(or edit distance). The function tries to match each of the keywords specified in the " + "keyword string against the string value of each item in the sequence $source.", new SequenceType[]{ new FunctionParameterSequenceType("source", Type.NODE, Cardinality.ZERO_OR_MORE, "The source"), new FunctionParameterSequenceType("keyword", Type.STRING, Cardinality.EXACTLY_ONE, "The keyword string")}, new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, "the sequence of nodes that match the keywords"), true); public FuzzyMatchAll(XQueryContext context) { super(context, Constants.FULLTEXT_AND, signature); } public FuzzyMatchAll(XQueryContext context, int type, FunctionSignature signature) { super(context, type, signature); } public Sequence evalQuery(NodeSet nodes, List terms) throws XPathException { if (terms == null || terms.size() == 0) return Sequence.EMPTY_SEQUENCE; // no search terms double threshold = 0.65; if (getArgumentCount() == 3) { Sequence thresOpt = getArgument(2).eval(nodes); //TODO : get rid of getLength() if(!thresOpt.hasOne()) throw new XPathException(this, "third argument to " + getName() + "should be a single double value"); threshold = ((DoubleValue) thresOpt.convertTo(Type.DOUBLE)).getDouble(); } // Can return NPE TextSearchEngine engine = context.getBroker().getTextEngine(); if(engine==null){ throw new XPathException("The legacy fulltext indexing has been disabled by " + "default from version 1.4.1. Please consider migrating to " + "the new full text indexing.."); } NodeSet hits[] = new NodeSet[terms.size()]; String term; TermMatcher matcher; for (int k = 0; k < terms.size(); k++) { term = (String)terms.get(k); if(term.length() == 0) hits[k] = null; else { matcher = new FuzzyMatcher(term, threshold); hits[k] = engine.getNodes( context, nodes.getDocumentSet(), nodes, NodeSet.ANCESTOR, null, matcher, term.substring(0, 1)); } } return mergeResults(hits); } }