DSTokenizer.java example

Explorer
dlibrary-master
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.search;

import java.io.Reader;

import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.Version;

/**
 * Customized Lucene Tokenizer, since the standard one rejects numbers from
 * indexing/querying.
 * 
 * @deprecated Since DSpace 4 the system use an abstraction layer named
 *             Discovery to provide access to different search providers. The
 *             legacy system built upon Apache Lucene is likely to be removed in
 *             a future version. If you are interested in using Lucene as backend
 *             for the DSpace search system, please consider to build a Lucene
 *             implementation of the Discovery interfaces
 */
@Deprecated
public final class DSTokenizer extends CharTokenizer
{
    /**
     * Construct a new LowerCaseTokenizer.
     * @param version Lucene version number
     */
    public DSTokenizer(Version version, Reader in)
    {
        super(version, in);
    }

    /**
     * Collects only characters which satisfy {@link Character#isLetter(char)}.
     */
    @Override
    protected int normalize(int c) {
        return super.normalize(Character.toLowerCase(c));
    }

    /**
     * Collects only characters which do not satisfy
     * {@link Character#isWhitespace(char)}.
     */
    @Override
    protected boolean isTokenChar(int c)
    {
        return Character.isLetterOrDigit(c);
    }
}