/* $Id$ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.tests;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import org.apache.manifoldcf.crawler.system.ManifoldCF;
import java.io.*;
import java.nio.charset.StandardCharsets;
/** Connector class to be used by general integration tests that need documents */
public class TestingRepositoryConnector extends org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector
{
public TestingRepositoryConnector()
{
}
@Override
public String addSeedDocuments(ISeedingActivity activities, Specification spec,
String lastSeedVersion, long seedTime, int jobMode)
throws ManifoldCFException, ServiceInterruption
{
String docCount = "3";
for (int i = 0; i < spec.getChildCount(); i++)
{
SpecificationNode sn = spec.getChild(i);
if (sn.getType().equals("documentcount"))
docCount = sn.getAttributeValue("count");
}
int count = Integer.parseInt(docCount);
for (int i = 0; i < count; i++)
{
String doc = "test"+i+".txt";
activities.addSeedDocument(doc,null);
}
return "";
}
/** Process a set of documents.
* This is the method that should cause each document to be fetched, processed, and the results either added
* to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
* The document specification allows this class to filter what is done based on the job.
* The connector will be connected before this method can be called.
*@param documentIdentifiers is the set of document identifiers to process.
*@param statuses are the currently-stored document versions for each document in the set of document identifiers
* passed in above.
*@param activities is the interface this method should use to queue up new document references
* and ingest documents.
*@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
*@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
*/
@Override
public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
throws ManifoldCFException, ServiceInterruption
{
for (int i = 0; i < documentIdentifiers.length; i++)
{
String documentIdentifier = documentIdentifiers[i];
RepositoryDocument rd = new RepositoryDocument();
byte[] bytes = documentIdentifier.getBytes(StandardCharsets.UTF_8);
rd.setBinary(new ByteArrayInputStream(bytes),bytes.length);
try
{
activities.ingestDocumentWithException(documentIdentifier,"","http://"+documentIdentifier,rd);
}
catch (IOException e)
{
throw new RuntimeException("Shouldn't be seeing IOException from binary array input stream: "+e.getMessage(),e);
}
}
}
}