/* $Id$ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.tests; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.agents.interfaces.*; import org.apache.manifoldcf.crawler.interfaces.*; import org.apache.manifoldcf.crawler.system.ManifoldCF; import java.io.*; import java.util.*; /** Connector class to be used by scheduling tests */ public class SchedulingRepositoryConnector extends org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector { // Throttling: the next time a fetch is allowed, per bin. protected static final Map<String,Long> nextFetchTime = new HashMap<String,Long>(); public SchedulingRepositoryConnector() { } @Override public String[] getBinNames(String documentIdentifier) { int index = documentIdentifier.indexOf("/"); return new String[]{documentIdentifier.substring(0,index)}; } @Override public String addSeedDocuments(ISeedingActivity activities, Specification spec, String lastSeedVersion, long seedTime, int jobMode) throws ManifoldCFException, ServiceInterruption { // A seed per domain String numberDomainsString = params.getParameter("numberDomains"); if (numberDomainsString == null) numberDomainsString = "10"; int numberDomains = Integer.parseInt(numberDomainsString); for (int i = 0; i < numberDomains; i++) { activities.addSeedDocument(Integer.toString(i)+"/",null); } System.out.println("Seeding completed at "+System.currentTimeMillis()); return ""; } /** Process a set of documents. * This is the method that should cause each document to be fetched, processed, and the results either added * to the queue of documents for the current job, and/or entered into the incremental ingestion manager. * The document specification allows this class to filter what is done based on the job. * The connector will be connected before this method can be called. *@param documentIdentifiers is the set of document identifiers to process. *@param statuses are the currently-stored document versions for each document in the set of document identifiers * passed in above. *@param activities is the interface this method should use to queue up new document references * and ingest documents. *@param jobMode is an integer describing how the job is being run, whether continuous or once-only. *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one. */ @Override public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec, IProcessActivity activities, int jobMode, boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption { String documentsPerSeedString = params.getParameter("documentsperseed"); if (documentsPerSeedString == null) documentsPerSeedString = "200"; int documentsPerSeed = Integer.parseInt(documentsPerSeedString); String timePerDocumentString = params.getParameter("timeperdocument"); if (timePerDocumentString == null) timePerDocumentString = "500"; int timePerDocument = Integer.parseInt(timePerDocumentString); // Seeds process instantly; other documents have a throttle based on the bin. for (int i = 0; i < documentIdentifiers.length; i++) { String documentIdentifier = documentIdentifiers[i]; if (documentIdentifier.endsWith("/")) { System.out.println("Evaluating seed for "+documentIdentifier+" at "+System.currentTimeMillis()); // Seed document. Add the document ID's for (int j = 0; j < documentsPerSeed; j++) { activities.addDocumentReference(documentIdentifier + Integer.toString(j),documentIdentifier,null, null,null,null); } System.out.println("Done evaluating seed for "+documentIdentifier+" at "+System.currentTimeMillis()); } else { System.out.println("Fetching "+documentIdentifier); // Find the bin String bin = documentIdentifier.substring(0,documentIdentifier.indexOf("/")); // For now they are all the same long binTimePerDocument = timePerDocument; long now = System.currentTimeMillis(); long whenFetch; synchronized (nextFetchTime) { Long time = nextFetchTime.get(bin); if (time == null) whenFetch = now; else whenFetch = time.longValue(); nextFetchTime.put(bin,new Long(whenFetch + binTimePerDocument)); } if (whenFetch > now) { System.out.println("Waiting "+(whenFetch-now)+" to fetch "+documentIdentifier); try { ManifoldCF.sleep(whenFetch-now); } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(),ManifoldCFException.INTERRUPTED); } System.out.println("Wait complete for "+documentIdentifier); } } } } }