/* * Copyright (c) 2009 Andrejs Jermakovics. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Andrejs Jermakovics - initial implementation */ package it.unibz.instasearch.indexing.tokenizers; import org.apache.lucene.analysis.TokenStream; /** * Splits words at non-alphanumeric characters but also returns the full word as a token */ public class DotSplitTokenizer extends TermSplitTokenizer { /** * @param in */ public DotSplitTokenizer(TokenStream in) { super(in); } @Override public String[] splitTerm(String term) { String[] parts = term.split("[.-]"); return parts; } @Override protected boolean returnOriginalTerm() { return true; } }