/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.analysis; // Commons Logging imports import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; // Nutch imports import org.apache.nutch.plugin.Extension; import org.apache.nutch.plugin.ExtensionPoint; import org.apache.nutch.plugin.PluginRuntimeException; import org.apache.nutch.plugin.PluginRepository; import org.apache.nutch.util.ObjectCache; import org.apache.hadoop.conf.Configuration; /** * Creates and caches {@link NutchAnalyzer} plugins. * * @author Jérôme Charron */ public class AnalyzerFactory { private final static String KEY = AnalyzerFactory.class.getName(); public final static Log LOG = LogFactory.getLog(KEY); private NutchAnalyzer DEFAULT_ANALYZER; private ExtensionPoint extensionPoint; private Configuration conf; public AnalyzerFactory (Configuration conf) { DEFAULT_ANALYZER = new NutchDocumentAnalyzer(conf); this.conf = conf; this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(NutchAnalyzer.X_POINT_ID); if(this.extensionPoint == null) { throw new RuntimeException("x point " + NutchAnalyzer.X_POINT_ID + " not found."); } } public static AnalyzerFactory get(Configuration conf) { ObjectCache objectCache = ObjectCache.get(conf); AnalyzerFactory factory = (AnalyzerFactory) objectCache.getObject(KEY); if (factory == null) { factory = new AnalyzerFactory(conf); objectCache.setObject(KEY, factory); } return factory; } /** * Returns the appropriate {@link NutchAnalyzer analyzer} implementation * given a language code. * * <p>NutchAnalyzer extensions should define the attribute "lang". The first * plugin found whose "lang" attribute equals the specified lang parameter is * used. If none match, then the {@link NutchDocumentAnalyzer} is used. */ public NutchAnalyzer get(String lang) { NutchAnalyzer analyzer = DEFAULT_ANALYZER; Extension extension = getExtension(lang); if (extension != null) { try { analyzer = (NutchAnalyzer) extension.getExtensionInstance(); } catch (PluginRuntimeException pre) { analyzer = DEFAULT_ANALYZER; } } return analyzer; } private Extension getExtension(String lang) { ObjectCache objectCache = ObjectCache.get(conf); if (lang == null) { return null; } Extension extension = (Extension) objectCache.getObject(lang); if (extension == null) { extension = findExtension(lang); if (extension != null) { objectCache.setObject(lang, extension); } } return extension; } private Extension findExtension(String lang) { if (lang != null) { Extension[] extensions = this.extensionPoint.getExtensions(); for (int i=0; i<extensions.length; i++) { if (lang.equals(extensions[i].getAttribute("lang"))) { return extensions[i]; } } } return null; } /** * Method used by unit test */ protected NutchAnalyzer getDefault() { return DEFAULT_ANALYZER; } }