// Copyright 2009 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.enterprise.connector.instantiator; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Maps; import com.google.enterprise.connector.common.PropertiesUtils; import com.google.enterprise.connector.common.SecurityUtils; import com.google.enterprise.connector.common.StringUtils; import com.google.enterprise.connector.database.ConnectorPersistentStoreFactory; import com.google.enterprise.connector.manager.Context; import com.google.enterprise.connector.persist.ConnectorExistsException; import com.google.enterprise.connector.persist.ConnectorNotFoundException; import com.google.enterprise.connector.pusher.DocumentAcceptorImpl; import com.google.enterprise.connector.pusher.PusherFactory; import com.google.enterprise.connector.scheduler.HostLoadManager; import com.google.enterprise.connector.scheduler.LoadManager; import com.google.enterprise.connector.scheduler.LoadManagerFactory; import com.google.enterprise.connector.scheduler.Schedule; import com.google.enterprise.connector.spi.AuthenticationManager; import com.google.enterprise.connector.spi.AuthorizationManager; import com.google.enterprise.connector.spi.ConfigureResponse; import com.google.enterprise.connector.spi.Connector; import com.google.enterprise.connector.spi.ConnectorPersistentStore; import com.google.enterprise.connector.spi.ConnectorPersistentStoreAware; import com.google.enterprise.connector.spi.ConnectorShutdownAware; import com.google.enterprise.connector.spi.ConnectorType; import com.google.enterprise.connector.spi.Lister; import com.google.enterprise.connector.spi.Retriever; import com.google.enterprise.connector.spi.TraversalContext; import com.google.enterprise.connector.spi.TraversalContextAware; import com.google.enterprise.connector.spi.TraversalManager; import com.google.enterprise.connector.spi.TraversalSchedule; import com.google.enterprise.connector.spi.TraversalScheduleAware; import com.google.enterprise.connector.traversal.BatchResult; import com.google.enterprise.connector.traversal.BatchResultRecorder; import com.google.enterprise.connector.traversal.BatchSize; import com.google.enterprise.connector.traversal.QueryTraverser; import com.google.enterprise.connector.traversal.TraversalDelayPolicy; import com.google.enterprise.connector.traversal.Traverser; import com.google.enterprise.connector.util.Clock; import com.google.enterprise.connector.util.filter.DocumentFilterFactory; import java.io.File; import java.io.IOException; import java.sql.SQLException; import java.util.Locale; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; /** * ConnectorCoordinator that supports Spring based connector instantiation and * persistent storage of connector configuration, schedule and traversal state. */ // TODO (jlacey): Context and ConnectorCoordinatorImpl are dangerously close // to encountering deadlock issues, calling each other from synchronized // methods. The most likely scenerio for deadlock would probably be when // registering the CM with a new GSA. Be wary when adding addition // synchronization to these classes. class ConnectorCoordinatorImpl implements ConnectorCoordinator, ChangeHandler, BatchResultRecorder { private static final Logger LOGGER = Logger.getLogger(ConnectorCoordinatorImpl.class.getName()); /** A default, disabled Schedule. */ private static final Schedule DEFAULT_SCHEDULE = new Schedule(); /** * Invariant context. */ private final String name; private final PusherFactory pusherFactory; private final ConnectorPersistentStoreFactory connectorPersistentStoreFactory; private final ThreadPool threadPool; private final ChangeDetector changeDetector; private final Clock clock; /** * Context set when an instance is created or configured and cleared when the * instance is removed. It is an invariant that either both of these are null * or neither is. */ private TypeInfo typeInfo; private InstanceInfo instanceInfo; /** * Context that is filled in on first use. Requires instanceInfo. */ private ConnectorInterfaces interfaces; /** * LoadManager controls throughput to avoid overtaxing the Repository * or the GSA. */ private final LoadManager loadManager; /** * The current traversal Schedule. */ private Schedule traversalSchedule; /** * The finish time for delay of next traversal. Used to postpone * starting another traversal for a short period of time, as dictated * by a {@link TraversalDelayPolicy}. */ private long traversalDelayEnd; /** * Context set when a batch is run. This must be cleared and any * running batch must be canceled when interfaces is reset. */ private TaskHandle taskHandle; Object currentBatchKey; /** * The cached TraversalManager. */ private TraversalManager traversalManager; private boolean traversalEnabled; /** * The cached Lister. */ private Lister lister; /** * The running Lister TaskHandle. */ private TaskHandle listerHandle; /** * The cached Retriever. */ private Retriever retriever; /** * Constructs a ConnectorCoordinator for the named {@link Connector}. * The {@code Connector} may not yet have a concrete instance. * * @param name The name of the Connector. * @param pusherFactory creates instances of * {@link com.google.enterprise.connector.pusher.Pusher Pusher} * for pushing documents to the GSA. * @param loadManagerFactory creates instances of * {@link LoadManager} for controlling the feed rate. * @param connectorPersistentStoreFactory creates instances of * {@link ConnectorPersistentStore} for Connectors that request * database access. * @param threadPool the {@link ThreadPool} for running traversals. * @param changeDetector used to invoke the ChangeHandlers for changes * originiting within this Manager instance (or from the Servlets). */ ConnectorCoordinatorImpl(String name, PusherFactory pusherFactory, LoadManagerFactory loadManagerFactory, ConnectorPersistentStoreFactory connectorPersistentStoreFactory, ThreadPool threadPool, ChangeDetector changeDetector, Clock clock) { this.name = name; this.threadPool = threadPool; this.clock = clock; this.changeDetector = changeDetector; this.pusherFactory = pusherFactory; this.loadManager = loadManagerFactory.newLoadManager(name); this.connectorPersistentStoreFactory = connectorPersistentStoreFactory; this.traversalEnabled = true; } /** * Returns the name of this {@link Connector}. * * @return The name of this Connector. */ @Override public String getConnectorName() { return name; } /** * Returns {@code true} if an instance of this {@link Connector} exists. */ @Override public synchronized boolean exists() { return (instanceInfo != null); } /** * Removes this {@link Connector} instance. Halts traversals, * removes the Connector instance from the known connectors, * and removes the Connector's on-disk representation. */ @Override public void removeConnector() { synchronized(this) { resetBatch(); if (instanceInfo != null) { instanceInfo.removeConnector(); } } // This must not be called while holding the lock. changeDetector.detect(); } /** * Removes this {@link Connector} instance. Halts traversals, * removes the Connector instance from the known connectors, * and removes the Connector's on-disk representation. */ @Override public synchronized void connectorRemoved() { LOGGER.info("Dropping connector: " + name); try { resetBatch(); if (instanceInfo != null) { File connectorDir = instanceInfo.getConnectorDir(); shutdownConnector(true); removeConnectorDirectory(connectorDir); } } finally { instanceInfo = null; typeInfo = null; traversalSchedule = null; traversalDelayEnd = 0; } } /** * Returns the {@link AuthenticationManager} for the {@link Connector} * instance. * * @return an AuthenticationManager * @throws InstantiatorException */ @Override public synchronized AuthenticationManager getAuthenticationManager() throws ConnectorNotFoundException, InstantiatorException { return getConnectorInterfaces().getAuthenticationManager(); } /** * Returns the {@link AuthorizationManager} for the {@link Connector} * instance. * * @return an AuthorizationManager * @throws InstantiatorException */ @Override public synchronized AuthorizationManager getAuthorizationManager() throws ConnectorNotFoundException, InstantiatorException { return getConnectorInterfaces().getAuthorizationManager(); } /** * Returns the {@link TraversalManager} for the {@link Connector} * instance. * * @return a TraversalManager * @throws InstantiatorException */ @Override public synchronized TraversalManager getTraversalManager() throws ConnectorNotFoundException, InstantiatorException { if (traversalManager == null && traversalEnabled) { traversalManager = getConnectorInterfaces().getTraversalManager(); if (traversalManager == null) { LOGGER.fine("Connector " + name + " has no TraversalManager."); traversalEnabled = false; } else { setTraversalContext(traversalManager); setTraversalSchedule(traversalManager, getSchedule()); } } return traversalManager; } /** If target is TraversalContextAware, set its traversalContext. */ private void setTraversalContext(Object target) { if (target != null && target instanceof TraversalContextAware) { TraversalContext traversalContext = Context.getInstance().getTraversalContext(); try { ((TraversalContextAware) target).setTraversalContext(traversalContext); } catch (Exception e) { LOGGER.log(Level.WARNING, "Unable to set TraversalContext", e); } } } /** If target is TraversalScheduleAware, set its traversalSchedule. */ private void setTraversalSchedule(Object target, Schedule schedule) { if (target != null && target instanceof TraversalScheduleAware) { try { ((TraversalScheduleAware) target).setTraversalSchedule(schedule); } catch (Exception e) { LOGGER.log(Level.WARNING, "Unable to set TraversalSchedule", e); } } } /** * Returns the {@link Lister} for the {@link Connector} * instance. * * @return a Lister * @throws InstantiatorException */ public synchronized Lister getLister() throws ConnectorNotFoundException, InstantiatorException { if (lister == null) { lister = getConnectorInterfaces().getLister(); setTraversalContext(lister); setTraversalSchedule(lister, getSchedule()); } return lister; } /** Start up the Lister for the connector, if this CM allows feeding. */ private synchronized void startLister() throws InstantiatorException { if (Context.getInstance().isFeeding()) { try { Lister lister = getLister(); if (lister != null) { LOGGER.log(Level.FINE, "Starting Lister for connector {0}", name); lister.setDocumentAcceptor(new DocumentAcceptorImpl( name, pusherFactory)); listerHandle = threadPool.submit(new CancelableLister(name, lister)); } } catch (ConnectorNotFoundException e) { throw new InstantiatorException("Connector not found " + name, e); } catch (Exception e) { LOGGER.log(Level.WARNING, "Failed to start Lister for connector " + name, e); } } } /** Stop the Lister for the connector. */ private synchronized void stopLister() { if (listerHandle != null && !listerHandle.isDone()) { LOGGER.log(Level.FINE, "Stopping Lister for connector {0}", name); listerHandle.cancel(); } else if (lister != null) { // We check lister here rather than getLister() to also avoid // logging this if the lister exists but has never been started. LOGGER.log(Level.FINER, "Already stopped Lister for connector {0}", name); } } /** * Return a {@link Retriever} that may be used to access content for the * document identified by {@code docid}. If the connector does not support * the {@link Retriever} interface, {@code null} is returned. * * @return a {@link Retriever}, or {@code null} if none is available * @throws ConnectorNotFoundException if this {@link ConnectorCoordinator} * does not exist. * @throws InstantiatorException if unable to instantiate the requested * {@link Retriever} */ @Override public Retriever getRetriever() throws ConnectorNotFoundException, InstantiatorException { if (retriever == null) { retriever = getConnectorInterfaces().getRetriever(); setTraversalContext(retriever); } return retriever; } /** * Get populated configuration form snippet for the {@link Connector} * instance. * * @param locale A java.util.Locale which the implementation may use to * produce appropriate descriptions and messages * @return a ConfigureResponse object. The form must be prepopulated with the * supplied data in the map. * @see ConnectorType#getPopulatedConfigForm(Map, Locale) */ @Override public synchronized ConfigureResponse getConfigForm(Locale locale) throws ConnectorNotFoundException, InstantiatorException { Configuration config = getConnectorConfiguration(); ConnectorType connectorType = typeInfo.getConnectorType(); try { ConfigureResponse response; // If config is null, the connector was deleted behind our back. // Treat this a new connector configuration. if (config == null) { response = connectorType.getConfigForm(locale); if (response != null) { return new ExtendedConfigureResponse(response, getConnectorInstancePrototype(name, typeInfo)); } } else { if (LOGGER.isLoggable(Level.CONFIG)) { LOGGER.config("GET POPULATED CONFIG FORM: locale = " + locale + ", configuration = " + SecurityUtils.getMaskedMap(config.getMap())); } response = connectorType.getPopulatedConfigForm(config.getMap(), locale); if (response != null) { return new ExtendedConfigureResponse(response, config); } } return response; } catch (Exception e) { throw new InstantiatorException("Failed to get configuration form", e); } } @Override public DocumentFilterFactory getDocumentFilterFactory() throws ConnectorNotFoundException { return getInstanceInfo().getDocumentFilterFactory(); } /** * Retraverses the {@link Connector}'s content from scratch. * Halts any traversal in progress and removes any saved traversal state, * forcing the Connector to retraverse the Repository from its start. */ @Override public void restartConnectorTraversal() throws ConnectorNotFoundException { // To avoid deadlock, this method calls InstanceInfo's getters and setters, // rather than the local ones. synchronized(this) { resetBatch(); // Halt any traversal. getInstanceInfo().setConnectorState(null); // Discard the checkpoint. // If Schedule was 'run-once', re-enable it to run again. But watch out - // empty disabled Schedules could look a bit like a run-once Schedule. Schedule schedule = getInstanceInfo().getConnectorSchedule(); if (schedule != null && schedule.isDisabled() && schedule.getRetryDelayMillis() == -1 && schedule.nextScheduledInterval() != -1) { schedule.setDisabled(false); getInstanceInfo().setConnectorSchedule(schedule); } } // TODO: Remove this if we switch completely to JDBC PersistentStore. // FileStore doesn't notice the deletion of a file that did not exist. if (lister != null) { connectorCheckpointChanged(null); } // This must not be called while holding the lock. changeDetector.detect(); } /** * Returns a traversal {@link Schedule} for the {@link Connector} instance, * or a default, disabled {@link Schedule} if the connector has no schedule. */ private synchronized Schedule getSchedule() { if (traversalSchedule == null) { try { traversalSchedule = getInstanceInfo().getConnectorSchedule(); if (traversalSchedule == null) { return DEFAULT_SCHEDULE; } } catch (ConnectorNotFoundException e) { return DEFAULT_SCHEDULE; } } return traversalSchedule; } /** * Sets the traversal {@link Schedule} for the {@link Connector}. * * @param connectorSchedule Schedule to store or null to unset any existing * Schedule. * @throws ConnectorNotFoundException if the connector is not found */ @Override public void setConnectorSchedule(Schedule connectorSchedule) throws ConnectorNotFoundException { synchronized(this) { // Persistently store the new schedule. getInstanceInfo().setConnectorSchedule(connectorSchedule); } // This must not be called while holding the lock. changeDetector.detect(); } /** * Handles a change to the traversal {@link Schedule} for the * {@link Connector}. * * @param schedule new Connector Schedule */ @Override public synchronized void connectorScheduleChanged(Schedule schedule) { LOGGER.config("Schedule changed for connector " + name + ": " + schedule); // Refresh the cached Schedule. traversalSchedule = schedule; // Update the LoadManager with the new load. loadManager.setLoad((schedule == null) ? HostLoadManager.DEFAULT_HOST_LOAD : schedule.getLoad()); // Let the traversal manager know the schedule changed. setTraversalSchedule(traversalManager, schedule); // Let the lister know the schedule changed. setTraversalSchedule(lister, schedule); // New Schedule may alter DelayPolicy. delayTraversal(TraversalDelayPolicy.IMMEDIATE); } /** * Fetches the traversal {@link Schedule} for the {@link Connector}. * * @return the Schedule, or null if there is no stored Schedule * for this connector. * @throws ConnectorNotFoundException if the connector is not found */ @Override public synchronized Schedule getConnectorSchedule() throws ConnectorNotFoundException { // Fetch the Schedule and Update the cache while we're at it. traversalSchedule = getInstanceInfo().getConnectorSchedule(); return traversalSchedule; } /** * Set the Connector's traversal state. * * @param state a String representation of the state to store. * If null, any previous stored state is discarded. * @throws ConnectorNotFoundException if this {@link ConnectorCoordinator} * does not exist. */ @Override public synchronized void setConnectorState(String state) throws ConnectorNotFoundException { getInstanceInfo().setConnectorState(state); // Must not call ChangeDetector, as this is called from a synchronized // block in BatchCoordinator. } /** * Handle a change to the Connector's traversal state. The only change * that matters is a change from non-null to null. This indicates that * the Repository should be retraversed from the beginning. * * @param checkpoint a String representation of the traversal state. */ @Override public void connectorCheckpointChanged(String checkpoint) { // If checkpoint has been nulled, then traverse the repository from scratch. if (checkpoint == null) { synchronized(this) { // Halt any traversal in progress. resetBatch(); // Shut down any Lister. stopLister(); try { // Restart Lister. startLister(); } catch (InstantiatorException e) { LOGGER.log(Level.WARNING, "Failed to restart Lister for connector " + name, e); } // Kick off a restart immediately. delayTraversal(TraversalDelayPolicy.IMMEDIATE); } LOGGER.info("Restarting traversal from beginning for connector " + name); } } /** * Returns the Connector's traversal state. * * @return String representation of the stored state, or * null if no state is stored. * @throws ConnectorNotFoundException if this {@link ConnectorCoordinator} * does not exist. */ @Override public synchronized String getConnectorState() throws ConnectorNotFoundException { return getInstanceInfo().getConnectorState(); } /** * Returns the name of the {@link ConnectorType} for this {@link Connector} * instance. */ @Override public synchronized String getConnectorTypeName() throws ConnectorNotFoundException { return getInstanceInfo().getTypeInfo().getConnectorTypeName(); } /** * Sets the {@link Configuration} for this {@link ConnectorCoordinator}. * If this {@link ConnectorCoordinator} supports persistence this will * persist the new Configuration. */ @Override public ConfigureResponse setConnectorConfiguration(TypeInfo newTypeInfo, Configuration configuration, Locale locale, boolean update) throws ConnectorNotFoundException, ConnectorExistsException, InstantiatorException { LOGGER.info("Configuring connector " + name); String typeName = newTypeInfo.getConnectorTypeName(); Preconditions.checkArgument(typeName.equals(configuration.getTypeName()), "TypeInfo must match Configuration type"); ConfigureResponse response = null; synchronized(this) { resetBatch(); if (instanceInfo != null) { if (!update) { throw new ConnectorExistsException(); } // TODO(jlacey): We don't want to hold the lock here, either, // because it blocks other actions on the admin console during // connector creation. if (typeName.equals(typeInfo.getConnectorTypeName())) { configuration = new Configuration(configuration, getConnectorConfiguration()); response = resetConfig(instanceInfo.getConnectorDir(), typeInfo, configuration, locale); } else { // An existing connector is being given a new type - drop then add. // TODO: This shouldn't be called from within the synchronized block // because it will kick the change detector. removeConnector(); response = createNewConnector(newTypeInfo, configuration, locale); if (response != null) { // TODO: We need to restore original Connector config. This is // necessary once we allow update a Connector with new ConnectorType. // However, when doing so consider: createNewConnector could have // thrown InstantiatorException as well. Also, you need to kick // the changeDetector (but not in this synchronized block). LOGGER.severe("Failed to update Connector configuration."); // + " Restoring original Connector configuration."); } } } else { if (update) { throw new ConnectorNotFoundException(); } response = createNewConnector(newTypeInfo, configuration, locale); } } if (response == null) { // This must not be called while holding the lock. changeDetector.detect(); } else { return new ExtendedConfigureResponse(response, configuration.getXml()); } return response; } @Override public synchronized Configuration getConnectorConfiguration() throws ConnectorNotFoundException { Configuration config = getInstanceInfo().getConnectorConfiguration(); if (config != null) { // Strip any "google*" properties that were saved by previous versions. config = removeGoogleProperties(config); if (config.getXml() == null) { return new Configuration(config, getConnectorInstancePrototype(name, typeInfo)); } } return config; } /** * Delay future traversals for a short period of time, as dictated by the * {@link TraversalDelayPolicy}. * * @param delayPolicy a TraversalDelayPolicy */ @VisibleForTesting synchronized void delayTraversal(TraversalDelayPolicy delayPolicy) { switch (delayPolicy) { case IMMEDIATE: traversalDelayEnd = 0; // No delay. break; case POLL: try { Schedule schedule = getSchedule(); int retryDelayMillis = schedule.getRetryDelayMillis(); if (retryDelayMillis == Schedule.POLLING_DISABLED) { if (!schedule.isDisabled()) { // We reached then end of the repository, but aren't allowed // to poll looking for new content to arrive. Disable the // traversal schedule. traversalDelayEnd = 0; schedule.setDisabled(true); // To avoid deadlock, this method calls InstanceInfo's setter, // rather than the local one. traversalSchedule = schedule; // Update local cache. getInstanceInfo().setConnectorSchedule(schedule); LOGGER.info("Traversal complete. Automatically pausing " + "traversal for connector " + name); } } else if (retryDelayMillis > 0) { traversalDelayEnd = clock.getTimeMillis() + retryDelayMillis; LOGGER.fine("Delaying traversal for connector " + name + " " + ((retryDelayMillis < (120 * 1000)) ? ((retryDelayMillis / 1000) + " seconds") : ((retryDelayMillis / (60 * 1000)) + " minutes")) + " after repository reveals no new content."); } } catch (ConnectorNotFoundException cnfe) { // Connector was deleted while processing the batch. Don't take any // action at the moment, as we may be in the middle of a reconfig. } break; case ERROR: traversalDelayEnd = clock.getTimeMillis() + Traverser.ERROR_WAIT_MILLIS; LOGGER.info("Delaying traversal for connector " + name + " " + (Traverser.ERROR_WAIT_MILLIS / (60 * 1000)) + " minutes after encountering an error."); break; } } /** * Returns {@code true} if it is OK to start a traversal, * {@code false} otherwise. */ @VisibleForTesting synchronized boolean shouldRun() { // If we do not have a traversing instance, don't run. if (instanceInfo == null) { return false; } // If traversals are disabled, don't run. if (!traversalEnabled) { return false; } // Are we already running? If so, we shouldn't run again. if (taskHandle != null && !taskHandle.isDone()) { return false; } // If the traversal schedule is disabled, don't run. if (getSchedule().isDisabled()) { return false; } // Don't run if we have postponed traversals. if (clock.getTimeMillis() < traversalDelayEnd) { return false; } // Don't run if we have exceeded our configured host load. if (loadManager.shouldDelay()) { return false; } // Run if we are within scheduled traversal interval. return getSchedule().inScheduledInterval(); } /** * Starts running a batch for this {@link ConnectorCoordinator} if a batch is * not already running. * * @return true if this call started a batch */ @Override public synchronized boolean startBatch() { if (!shouldRun()) { return false; } BatchSize batchSize = loadManager.determineBatchSize(); if (batchSize.getHint() == 0) { return false; } try { TraversalManager traversalManager = getTraversalManager(); if (traversalManager == null) { return false; } currentBatchKey = new Object(); BatchCoordinator batchCoordinator = new BatchCoordinator(this); Traverser traverser = new QueryTraverser(pusherFactory, traversalManager, batchCoordinator, name, Context.getInstance().getTraversalContext(), clock); TimedCancelable batch = new CancelableBatch(traverser, name, batchCoordinator, batchCoordinator, batchSize); taskHandle = threadPool.submit(batch); return true; } catch (ConnectorNotFoundException cnfe) { LOGGER.log(Level.WARNING, "Connector not found - this is normal if you " + " recently reconfigured your connector instance: " + cnfe); } catch (InstantiatorException ie) { LOGGER.log(Level.WARNING, "Failed to perform connector content traversal.", ie); delayTraversal(TraversalDelayPolicy.ERROR); } return false; } /** * Records the supplied traversal batch results. Updates the * {@link LoadManager} with number of documents traversed, * and implements the requested {@link TraversalDelayPolicy}. * * @param result a BatchResult */ @Override public synchronized void recordResult(BatchResult result) { loadManager.recordResult(result); delayTraversal(result.getDelayPolicy()); } /** * Shuts down this {@link Connector} instance. Halts any in-progress * traversals, instructs the Connector that it is being shut down, * and discards the Connector instance. Any on-disk representation of * the connector remains. */ @Override public synchronized void shutdown() { resetBatch(); shutdownConnector(false); instanceInfo = null; } /** * Halts any in-progess traversals for this {@link Connector} instance. * Some or all of the information collected during the current traversal * may be discarded. */ synchronized void resetBatch() { if (taskHandle != null) { taskHandle.cancel(); } taskHandle = null; currentBatchKey = null; interfaces = null; // Discard cached interface instances. traversalManager = null; retriever = null; traversalSchedule = null; } /** * Informs the Connector instance that it will be shut down * and possibly deleted. * * @param delete {@code true} if the {@code Connector} will be deleted. */ private void shutdownConnector(boolean delete) { // Discard cached instances. traversalManager = null; retriever = null; traversalSchedule = null; // Shut down the Lister, if running. stopLister(); lister = null; if (instanceInfo != null && instanceInfo.getConnector() instanceof ConnectorShutdownAware) { ConnectorShutdownAware csa = (ConnectorShutdownAware)(instanceInfo.getConnector()); try { LOGGER.fine("Shutting down connector " + name); csa.shutdown(); } catch (Exception e) { LOGGER.log(Level.WARNING, "Problem shutting down connector " + name + " during configuration update.", e); } if (delete) { try { LOGGER.fine("Removing connector " + name); csa.delete(); } catch (Exception e) { LOGGER.log(Level.WARNING, "Failed to remove connector " + name, e); } } } } /** * Returns the {@link InstanceInfo} representing the associated * {@link Connector} instance. * * @throws ConnectorNotFoundException if there is no associated Connector * instance. */ @VisibleForTesting InstanceInfo getInstanceInfo() throws ConnectorNotFoundException { verifyConnectorInstanceAvailable(); return instanceInfo; } /** * Checks if this {@code ConnectorCoordinator} is associated * with an active {@link Connector} instance. * * @throws ConnectorNotFoundException if there is no associated Connector * instance. */ private void verifyConnectorInstanceAvailable() throws ConnectorNotFoundException { if (instanceInfo == null) { throw new ConnectorNotFoundException("Connector instance " + name + " not available."); } } /** * Returns a {@link ConnectorInterfaces} object that exposes the public * interfaces of the associated {@link Connector} instance. * * @throws ConnectorNotFoundException if there is no associated Connector * instance. */ private ConnectorInterfaces getConnectorInterfaces() throws ConnectorNotFoundException { if (interfaces == null) { InstanceInfo info = getInstanceInfo(); interfaces = new ConnectorInterfaces(name, info.getConnector()); } return interfaces; } private ConfigureResponse createNewConnector(TypeInfo newTypeInfo, Configuration configuration, Locale locale) throws InstantiatorException { if (newTypeInfo == null) { throw new IllegalStateException( "Create new connector with no type specified."); } if (instanceInfo != null) { throw new IllegalStateException( "Create new connector when one already exists."); } File connectorDir = getConnectorDir(newTypeInfo); boolean didMakeConnectorDir = makeConnectorDirectory(connectorDir); // If there is no connectorInstance.xml in the config, look to see if // there is one stored. If not, fetch the connectorInstancePrototype // from the connectorType. if (configuration.getXml() == null) { // Check to see if there is a pre-existing connectorInstance.xml. Configuration old = new InstanceInfo(name, connectorDir, newTypeInfo) .getConnectorConfiguration(); if (old != null && old.getXml() != null) { configuration = new Configuration(configuration, old); } else { configuration = new Configuration(configuration, getConnectorInstancePrototype(name, newTypeInfo)); } } try { ConfigureResponse result = resetConfig(connectorDir, newTypeInfo, configuration, locale); if (result != null && result.getMessage() != null && didMakeConnectorDir) { removeConnectorDirectory(connectorDir); } return result; } catch (InstantiatorException ie) { if (didMakeConnectorDir) { removeConnectorDirectory(connectorDir); } throw (ie); } } @Override public void connectorAdded(TypeInfo newTypeInfo, Configuration configuration) throws InstantiatorException { if (instanceInfo != null) { throw new IllegalStateException( "Create new connector when one already exists."); } File connectorDir = getConnectorDir(newTypeInfo); boolean didMakeConnectorDir = makeConnectorDirectory(connectorDir); try { connectorConfigurationChanged(newTypeInfo, configuration); } catch (InstantiatorException ie) { if (didMakeConnectorDir) { removeConnectorDirectory(connectorDir); } throw (ie); } } private ConfigureResponse resetConfig(File connectorDir, TypeInfo newTypeInfo, Configuration config, Locale locale) throws InstantiatorException { // Copy the configuration map, adding a couple of additional // context properties. validateConfig() may also alter this map. Configuration newConfiguration = addGoogleProperties(config, connectorDir); // Validate the configuration. if (LOGGER.isLoggable(Level.CONFIG)) { LOGGER.config("VALIDATE CONFIG: Validating connector " + name + ": locale = " + locale + ", " + newConfiguration); } ConfigureResponse response = validateConfig(connectorDir, newTypeInfo, newConfiguration, locale); if (response != null) { // If validateConfig() returns a non-null response with an error message, // or populated config form, then consider it an invalid config that // needs to be corrected. Return the response so that the config form // may be redisplayed. if ((response.getMessage() != null) || (response.getFormSnippet() != null)) { LOGGER.warning("A rejected configuration for connector " + name + " was returned."); return response; } // If validateConfig() returns a response with no message or formSnippet, // but does include a configuration Map; then consider it a valid, // but possibly altered configuration and use it. if (response.getConfigData() != null) { if (LOGGER.isLoggable(Level.CONFIG)) { LOGGER.config("A modified configuration for connector " + name + " was returned: " + SecurityUtils.getMaskedMap(response.getConfigData())); } newConfiguration = new Configuration(response.getConfigData(), config); } } // We have an apparently valid configuration. Create a connector instance // with that configuration. // TODO(jlacey): Try to avoid instantiating the connector 3 times. // 1) The connector may have called makeConnector. // 2) We create an InstanceInfo here for validation but don't save it. // 3) The change detector instantiates the connector and sets // instanceInfo in connectorConfigurationChanged. // The gap between the first two is only the parsing the of the // document filter chain bean in the InstanceInfo constructor. // That could be added to makeConnector, but then we would have to // ensure that makeConnector was called around here somewhere if // the connector in fact did not call it. InstanceInfo newInstanceInfo = new InstanceInfo(name, connectorDir, newTypeInfo, newConfiguration); // Set up connector database access. setDatabaseAccess(newInstanceInfo); // Only after validateConfig and instantiation succeeds do we // save the new configuration to persistent store. newInstanceInfo.setConnectorConfiguration( removeGoogleProperties(newConfiguration)); return null; } /** * Handles a change to a Connector's Configuration. Shuts down any * current instance of the Connector and starts up a new instance with * the new Configuration. * * @param newTypeInfo the {@link TypeInfo} for this this Connector. * @param config a new {@link Configuration} for this Connector. */ @Override public void connectorConfigurationChanged(TypeInfo newTypeInfo, Configuration config) throws InstantiatorException { if (LOGGER.isLoggable(Level.CONFIG)) { LOGGER.config("New configuration for connector " + name + ": " + config); } File connectorDir = getConnectorDir(newTypeInfo); // We have an apparently valid configuration. Create a connector instance // with that configuration. InstanceInfo newInstanceInfo = new InstanceInfo(name, connectorDir, newTypeInfo, addGoogleProperties(config, connectorDir)); // Tell old connector instance to shut down, as it is being replaced. resetBatch(); shutdownConnector(false); setDatabaseAccess(newInstanceInfo); // TODO(jlacey): Unsynchronized writes to otherwise synchronized fields. instanceInfo = newInstanceInfo; typeInfo = newTypeInfo; // Prefetch the AuthenticationManager and AuthorizationManager to // avoid AuthN and AuthZ timeouts when logging in to the repository // at search time. try { getAuthenticationManager(); getAuthorizationManager(); } catch (ConnectorNotFoundException cnfe) { // Not going to happen here, but even if it did, we don't care. } catch (InstantiatorException ie) { // Likely failed connector.login(). This attempt to cache the managers // failed. However it is not important yet, so log it and continue on. LOGGER.log(Level.WARNING, "Prefetch failed for connector " + name, ie); } // The load value in a Schedule is docs/minute. loadManager.setLoad(getSchedule().getLoad()); // Start up a Lister, if the Connector supports one. startLister(); // Allow newly modified connector to resume traversals immediately. delayTraversal(TraversalDelayPolicy.IMMEDIATE); } /** * Sets GData configuration for GData aware Connectors. */ /* TODO: This should either set real GData configuration or we should supply * the connector with a GDataClientFactory. Unfortunately full GData * configuration (protocol, addr, port, userId, userPwd) in the CM doesn't * work for on-board Connector Managers, as it isn't editable. At this * point, we supply just the GSA Feed Host to the connector and leave the * rest of the GData configuration to the connector. * A GDataClientFactory runs into problems when the feed host changes. */ /* TODO: This is not HA safe! (But no change to CM config is.) */ public void setGDataConfig() throws ConnectorNotFoundException, InstantiatorException { Map<String, String> newConfig = Maps.newHashMap(); newConfig.put(PropertiesUtils.GOOGLE_FEED_HOST, Context.getInstance().getGsaFeedHost()); getInstanceInfo().setGDataConfig(newConfig); } /** * Adds special "google" properties to the Configuration. */ private Configuration addGoogleProperties(Configuration config, File connectorDir) { Map<String, String> newConfig = Maps.newHashMap(config.getMap()); newConfig.put(PropertiesUtils.GOOGLE_CONNECTOR_NAME, name); newConfig.put(PropertiesUtils.GOOGLE_CONNECTOR_WORK_DIR, connectorDir.getPath()); Context context = Context.getInstance(); newConfig.put(PropertiesUtils.GOOGLE_WORK_DIR, context.getCommonDirPath()); // TODO: This should either set real GData configuration or supply the // connector with a GDataClientFactory. See comment on setGDataConfig(). if (context.getGsaFeedHost() != null) { // Because Properties hate nulls. newConfig.put(PropertiesUtils.GOOGLE_FEED_HOST, context.getGsaFeedHost()); } return new Configuration(newConfig, config); } /** * Removes non-persistable "google" properties from the Configuration. */ private Configuration removeGoogleProperties(Configuration config) { // Make a copy of the map with google* entries removed. Map<String, String> newConfig = Maps.newHashMap( Maps.filterKeys(config.getMap(), new Predicate<String>() { public boolean apply(String input) { return !PropertiesUtils.GOOGLE_NONPERSISTABLE_PROPERTIES .contains(input); } })); return new Configuration(newConfig, config); } @SuppressWarnings("unchecked") private void setDatabaseAccess(InstanceInfo instanceInfo) throws InstantiatorException { try { if (connectorPersistentStoreFactory != null) { Connector connector = instanceInfo.getConnector(); if (connector instanceof ConnectorPersistentStoreAware) { ConnectorPersistentStore pstore = connectorPersistentStoreFactory.newConnectorPersistentStore( instanceInfo.getName(), instanceInfo.getTypeInfo().getConnectorTypeName(), instanceInfo.getTypeInfo().getConnectorType()); LOGGER.config("Setting DatabasePersistentStore for connector " + name); ((ConnectorPersistentStoreAware) connector).setDatabaseAccess(pstore); } } } catch (SQLException e) { throw new InstantiatorException("Failed to set database access for " + "connector " + instanceInfo.getName(), e); } } private ConfigureResponse validateConfig( File connectorDir, TypeInfo typeInfo, Configuration config, Locale locale) throws InstantiatorException { ConnectorInstanceFactory factory = new ConnectorInstanceFactory(name, typeInfo, config, connectorPersistentStoreFactory); try { return typeInfo.getConnectorType() .validateConfig(config.getMap(), locale, factory); } catch (Exception e) { throw new InstantiatorException("Unexpected validateConfig failure.", e); } finally { factory.shutdown(); } } // Extract connectorInstance.xml from the Connector's jar file. private static String getConnectorInstancePrototype(String name, TypeInfo typeInfo) { try { return StringUtils.streamToStringAndThrow( typeInfo.getConnectorInstancePrototype().getInputStream()); } catch (IOException ioe) { LOGGER.log(Level.WARNING, "Failed to extract connectorInstance.xml " + " for connector " + name, ioe); } return null; } /** Manufactures the connector directory path from the TypeInfo and name. */ private File getConnectorDir(TypeInfo typeInfo) { return new File(typeInfo.getConnectorTypeDir(), name); } /** * Make the on-disk {@link Connector} directory, if it doesn't already exist. * * @return true if directory was created, false otherwise. * @throws InstantiatorException if the directory could not be created. */ private boolean makeConnectorDirectory(File connectorDir) throws InstantiatorException { if (connectorDir.exists()) { if (connectorDir.isDirectory()) { // we don't know why this directory already exists, but we're ok with it LOGGER.warning("Connector directory " + connectorDir.getAbsolutePath() + "; already exists for connector " + name); } else { throw new InstantiatorException("Existing file blocks creation of " + "connector directory at " + connectorDir.getAbsolutePath() + " for connector " + name); } } else { LOGGER.finest("Making connector directory " + connectorDir.getAbsolutePath()); if (!connectorDir.mkdirs()) { throw new InstantiatorException("Can not create " + "connector directory at " + connectorDir.getAbsolutePath() + " for connector " + name); } return true; } return false; } /** * Remove the on-disk {@link Connector} representation. This removes * many or all files in the {@code Connector}'s directory. */ // TODO: Issue 87: Should we force the removal of files created by the // Connector implementation? ConnectorShutdownAware.delete() gives the // Connector an opportunity to delete these files in a cleaner fashion. private static void removeConnectorDirectory(File connectorDir) { if (connectorDir.exists()) { LOGGER.finest("Removing connector directory " + connectorDir.getAbsolutePath()); if (!connectorDir.delete()) { LOGGER.warning("Failed to delete connector directory " + connectorDir.getPath() + "; this connector may be difficult to delete."); } } } }