/******************************************************************************* * Copyright (c) 2000, 2016 IBM Corporation and others. All rights reserved. This program and the * accompanying materials are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation * Alexander Kurtakov - Bug 460787 * Sopot Cela - Bug 466829 *******************************************************************************/ package org.eclipse.help.internal.search; import java.util.Locale; import java.util.StringTokenizer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.eclipse.core.runtime.Platform; import org.eclipse.help.internal.base.HelpBasePlugin; import com.ibm.icu.text.BreakIterator; /** * Lucene Analyzer. LowerCaseFilter->StandardTokenizer */ public final class DefaultAnalyzer extends Analyzer { private Locale locale; /** * Creates a new analyzer using the given locale. */ public DefaultAnalyzer(String localeString) { super(); // Create a locale object for a given locale string Locale userLocale = getLocale(localeString); // Check if the locale is supported by BreakIterator // check here to do it only once. Locale[] availableLocales = BreakIterator.getAvailableLocales(); for (int i = 0; i < availableLocales.length; i++) { if (userLocale.equals(availableLocales[i])) { locale = userLocale; break; } } if (locale == null && userLocale.getDisplayVariant().length() > 0) { // Check if the locale without variant is supported by BreakIterator Locale countryLocale = new Locale(userLocale.getLanguage(), userLocale.getCountry()); for (int i = 0; i < availableLocales.length; i++) { if (countryLocale.equals(availableLocales[i])) { locale = countryLocale; break; } } } if (locale == null && userLocale.getCountry().length() > 0) { // Check if at least the language is supported by BreakIterator Locale language = new Locale(userLocale.getLanguage(), ""); //$NON-NLS-1$ for (int i = 0; i < availableLocales.length; i++) { if (language.equals(availableLocales[i])) { locale = language; break; } } } if (locale == null) { // Locale is not supported, will use en_US HelpBasePlugin .logError( "Text Analyzer could not be created for locale {0}. An analyzer that extends org.eclipse.help.luceneAnalyzer extension point needs to be plugged in for locale " //$NON-NLS-1$ + localeString + ", or Java Virtual Machine needs to be upgraded to version with proper support for locale {0}.", //$NON-NLS-1$ null); locale = new Locale("en", "US"); //$NON-NLS-1$ //$NON-NLS-2$ } } /** * Creates a Locale object out of a string representation */ private Locale getLocale(String clientLocale) { if (clientLocale == null) clientLocale = Platform.getNL(); if (clientLocale == null) clientLocale = Locale.getDefault().toString(); // break the string into tokens to get the Locale object StringTokenizer locales = new StringTokenizer(clientLocale, "_"); //$NON-NLS-1$ if (locales.countTokens() == 1) return new Locale(locales.nextToken(), ""); //$NON-NLS-1$ else if (locales.countTokens() == 2) return new Locale(locales.nextToken(), locales.nextToken()); else if (locales.countTokens() == 3) return new Locale(locales.nextToken(), locales.nextToken(), locales.nextToken()); else return Locale.getDefault(); } /* * Can't use try-with-resources because the Lucene internally reuses * components. See {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy} */ @SuppressWarnings("resource") @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new StandardTokenizer(); LowerCaseFilter filter = new LowerCaseFilter(source); TokenStreamComponents components = new TokenStreamComponents(source, filter); return components; } }