/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.search;
import java.io.Reader;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.util.Version;
/**
* Customized Lucene Tokenizer, since the standard one rejects numbers from
* indexing/querying.
*/
public final class DSTokenizer extends CharTokenizer
{
/**
* Construct a new LowerCaseTokenizer.
* @param version Lucene version number
*/
public DSTokenizer(Version version, Reader in)
{
super(version, in);
}
/**
* Collects only characters which satisfy {@link Character#isLetter(char)}.
*/
@Override
protected int normalize(int c) {
return super.normalize(Character.toLowerCase(c));
}
/**
* Collects only characters which do not satisfy
* {@link Character#isWhitespace(char)}.
*/
@Override
protected boolean isTokenChar(int c)
{
return Character.isLetterOrDigit(c);
}
}