/* $Id: NullConnector.java 988245 2010-08-23 18:39:35Z kwright $ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.agents.output.nullconnector;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import java.util.*;
import java.io.*;
/** This is a null output connector. It eats all output and simply logs the events.
*/
public class NullConnector extends org.apache.manifoldcf.agents.output.BaseOutputConnector
{
public static final String _rcsid = "@(#)$Id: NullConnector.java 988245 2010-08-23 18:39:35Z kwright $";
// Activities we log
/** Ingestion activity */
public final static String INGEST_ACTIVITY = "document ingest";
/** Document removal activity */
public final static String REMOVE_ACTIVITY = "document deletion";
/** Job notify activity */
public final static String JOB_COMPLETE_ACTIVITY = "output notification";
/** Constructor.
*/
public NullConnector()
{
}
/** Return the list of activities that this connector supports (i.e. writes into the log).
*@return the list.
*/
@Override
public String[] getActivitiesList()
{
return new String[]{INGEST_ACTIVITY,REMOVE_ACTIVITY,JOB_COMPLETE_ACTIVITY};
}
/** Connect.
*@param configParameters is the set of configuration parameters, which
* in this case describe the target appliance, basic auth configuration, etc. (This formerly came
* out of the ini file.)
*/
@Override
public void connect(ConfigParams configParameters)
{
super.connect(configParameters);
}
/** Close the connection. Call this before discarding the connection.
*/
@Override
public void disconnect()
throws ManifoldCFException
{
super.disconnect();
}
/** Set up a session */
protected void getSession()
throws ManifoldCFException, ServiceInterruption
{
}
/** Test the connection. Returns a string describing the connection integrity.
*@return the connection's status as a displayable string.
*/
@Override
public String check()
throws ManifoldCFException
{
try
{
getSession();
return super.check();
}
catch (ServiceInterruption e)
{
return "Transient error: "+e.getMessage();
}
}
/** Get an output version string, given an output specification. The output version string is used to uniquely describe the pertinent details of
* the output specification and the configuration, to allow the Connector Framework to determine whether a document will need to be output again.
* Note that the contents of the document cannot be considered by this method, and that a different version string (defined in IRepositoryConnector)
* is used to describe the version of the actual document.
*
* This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
* necessary.
*@param spec is the current output specification for the job that is doing the crawling.
*@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
* the document will not need to be sent again to the output data store.
*/
@Override
public VersionContext getPipelineDescription(Specification spec)
throws ManifoldCFException, ServiceInterruption
{
return new VersionContext("",params,spec);
}
/** Add (or replace) a document in the output data store using the connector.
* This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
* necessary.
* The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
* output description, since that was what was partly used to determine if output should be taking place. So it may be necessary for this method to decode
* an output description string in order to determine what should be done.
*@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
* and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
*@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
*@param document is the document data to be processed (handed to the output data store).
*@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document. May be null.
*@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
*@return the document status (accepted or permanently rejected).
*/
@Override
public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
throws ManifoldCFException, ServiceInterruption, IOException
{
// Establish a session
getSession();
final StringBuffer sb = new StringBuffer();
final Iterator<String> metadataKeys = document.getFields();
boolean needComma = false;
while (metadataKeys.hasNext()) {
final String key = metadataKeys.next();
final String[] values = document.getFieldAsStrings(key);
if (needComma) {
sb.append(",");
} else {
needComma = true;
}
sb.append("\"").append(key).append("\":").append(Integer.toString(values.length));
}
activities.recordActivity(null,INGEST_ACTIVITY,new Long(document.getBinaryLength()),documentURI,"OK",sb.toString());
return DOCUMENTSTATUS_ACCEPTED;
}
/** Remove a document using the connector.
* Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
*@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
* and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
*@param outputDescription is the last description string that was constructed for this document by the getOutputDescription() method above.
*@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
*/
@Override
public void removeDocument(String documentURI, String outputDescription, IOutputRemoveActivity activities)
throws ManifoldCFException, ServiceInterruption
{
// Establish a session
getSession();
activities.recordActivity(null,REMOVE_ACTIVITY,null,documentURI,"OK",null);
}
/** Notify the connector of a completed job.
* This is meant to allow the connector to flush any internal data structures it has been keeping around, or to tell the output repository that this
* is a good time to synchronize things. It is called whenever a job is either completed or aborted.
*@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
*/
@Override
public void noteJobComplete(IOutputNotifyActivity activities)
throws ManifoldCFException, ServiceInterruption
{
activities.recordActivity(null,JOB_COMPLETE_ACTIVITY,null,"","OK",null);
}
}