/* $Id: JobDescription.java 988245 2010-08-23 18:39:35Z kwright $ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.jobs;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import java.util.*;
/** This is a paper object describing a job.
* Each job in the lcf framework has:
* - an identifier;
* - a description;
* - a repository connection;
* - one of a number of scheduling options: starting every n hours/days/weeks/months, on specific dates, or "continuous" (which basically
* establishes a priority queue based on modification frequency);
* - "seeds" (or starting points), which are the places that scanning begins.
* Also remember that since incremental deletion must occur on a job-by-job basis, the scanning data also records the job that
* performed the scan, so that each job can rescan previous ingested data, and delete documents that have been removed.
*/
public class JobDescription implements IJobDescription
{
public static final String _rcsid = "@(#)$Id: JobDescription.java 988245 2010-08-23 18:39:35Z kwright $";
// Data
protected boolean isNew = true;
protected Long id = null;
protected String description = null;
protected String connectionName = null;
protected final List<PipelineStage> pipelineStages = new ArrayList<PipelineStage>();
protected final List<Notification> notifications = new ArrayList<Notification>();
protected int type = TYPE_CONTINUOUS;
protected int startMethod = START_WINDOWBEGIN;
protected int priority = 5;
// Absolute job-triggering times
protected ScheduleList scheduleList = new ScheduleList();
// Throttle
protected Float rate = null;
// Default interval for continuous crawling
protected Long interval = new Long(1000L*3600L*24L); // 1 day is the default
// Maximum interval for continuous crawling
protected Long maxInterval = null;
// Document expiration time for this job, in milliseconds
protected Long expiration = null; // Never is the default
// Default reseed interval for continuous crawling
protected Long reseedInterval = new Long(60L * 60L * 1000L); // 1 hour is the default
// Document specification
protected Specification documentSpecification = new Specification();
// Hop count filters.
protected HashMap hopCountFilters = new HashMap();
// Hopcount mode
protected int hopcountMode = HOPCOUNT_ACCURATE;
// Read-only mode
protected boolean readOnly = false;
/** Duplicate method, with optional "readonly" flag.
*/
public JobDescription duplicate(boolean readOnly)
{
if (readOnly && this.readOnly)
return this;
// Make a new copy; we'll label it as readonly or not based on the input flag
JobDescription rval = new JobDescription();
rval.id = id;
rval.isNew = isNew;
rval.connectionName = connectionName;
// Direct modification of this object is possible - so it also has to know if it is read-only!!
rval.documentSpecification = documentSpecification.duplicate(readOnly);
for (PipelineStage pipelineStage : pipelineStages)
{
rval.pipelineStages.add(new PipelineStage(pipelineStage.getPrerequisiteStage(),
pipelineStage.getIsOutput(),
pipelineStage.getConnectionName(),
pipelineStage.getDescription(),
pipelineStage.getSpecification().duplicate(readOnly)));
}
for (Notification notification : notifications)
{
rval.notifications.add(new Notification(notification.getConnectionName(),
notification.getDescription(),
notification.getSpecification().duplicate(readOnly)));
}
rval.description = description;
rval.type = type;
// No direct modification of this object is possible
rval.scheduleList = scheduleList.duplicate();
rval.interval = interval;
rval.maxInterval = maxInterval;
rval.expiration = expiration;
rval.reseedInterval = reseedInterval;
rval.rate = rate;
rval.priority = priority;
rval.startMethod = startMethod;
rval.hopcountMode = hopcountMode;
Iterator iter = hopCountFilters.keySet().iterator();
while (iter.hasNext())
{
String linkType = (String)iter.next();
Long maxHops = (Long)hopCountFilters.get(linkType);
rval.hopCountFilters.put(linkType,maxHops);
}
rval.readOnly = readOnly;
return rval;
}
/** Make the description "read only". This must be done after the object has been complete specified.
* Once a document is read-only, it cannot be made writable without duplication.
*/
public void makeReadOnly()
{
if (readOnly)
return;
readOnly = true;
for (PipelineStage pipelineStage : pipelineStages)
{
pipelineStage.getSpecification().makeReadOnly();
}
for (Notification notification : notifications)
{
notification.getSpecification().makeReadOnly();
}
documentSpecification.makeReadOnly();
}
/** Set isnew.
*@param isNew is true if the object is new.
*/
public void setIsNew(boolean isNew)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.isNew = isNew;
}
/** Get isnew.
*@return true if the object is new.
*/
@Override
public boolean getIsNew()
{
return isNew;
}
/** Set the id.
*@param id is the id.
*/
public void setID(Long id)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.id = id;
}
/** Get the id.
*@return the id.
*/
@Override
public Long getID()
{
return id;
}
/** Set the description.
*@param description is the description.
*/
@Override
public void setDescription(String description)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.description = description;
}
/** Get the description.
*@return the description
*/
@Override
public String getDescription()
{
return description;
}
/** Set the connection name.
*@param connectionName is the connection name.
*/
@Override
public void setConnectionName(String connectionName)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.connectionName = connectionName;
}
/** Get the connection name.
*@return the connection name.
*/
@Override
public String getConnectionName()
{
return connectionName;
}
/** Clear pipeline connections */
@Override
public void clearPipeline()
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
pipelineStages.clear();
}
/** Add a pipeline connection.
*@param prerequisiteStage is the prerequisite stage number for this connection, or -1 if there is none.
*@param isOutput is true if the pipeline stage is an output connection.
*@param pipelineStageConnectionName is the name of the pipeline connection to add.
*@param pipelineStageDescription is a description of the pipeline stage being added.
*@return the empty output specification for this pipeline stage.
*/
@Override
public Specification addPipelineStage(int prerequisiteStage, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
PipelineStage ps = new PipelineStage(prerequisiteStage,isOutput,pipelineStageConnectionName,pipelineStageDescription);
pipelineStages.add(ps);
return ps.getSpecification();
}
/** Get a count of pipeline stages */
@Override
public int countPipelineStages()
{
return pipelineStages.size();
}
/** Insert a new pipeline stage.
*@param index is the index to insert pipeline stage before
*@param pipelineStageConnectionName is the connection name.
*@param pipelineStageDescription is the description.
*@return the newly-created output specification.
*/
@Override
public Specification insertPipelineStage(int index, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
// What we do here depends on the kind of stage we're inserting.
// Both kinds take the current stage's prerequisite as their own. But what happens to the current stage will
// differ as to whether its reference changes or not.
PipelineStage currentStage = pipelineStages.get(index);
PipelineStage ps = new PipelineStage(currentStage.getPrerequisiteStage(),isOutput,pipelineStageConnectionName,pipelineStageDescription);
pipelineStages.add(index,ps);
currentStage.adjustReplacedStage(index,isOutput);
// Adjust stage back-references
int stage = index + 2;
while (stage < pipelineStages.size())
{
pipelineStages.get(stage).adjustForInsert(index);
stage++;
}
return ps.getSpecification();
}
/** Get the prerequisite stage number for a pipeline stage.
*@param index is the index of the pipeline stage to get.
*@return the preceding stage number for that stage, or -1 if there is none.
*/
@Override
public int getPipelineStagePrerequisite(int index)
{
return pipelineStages.get(index).getPrerequisiteStage();
}
/** Check if a pipeline stage is an output connection.
*@param index is the index of the pipeline stage to check.
*@return true if it is an output connection.
*/
@Override
public boolean getPipelineStageIsOutputConnection(int index)
{
return pipelineStages.get(index).getIsOutput();
}
/** Get a specific pipeline connection name.
*@param index is the index of the pipeline stage whose connection name to get.
*@return the name of the connection.
*/
@Override
public String getPipelineStageConnectionName(int index)
{
return pipelineStages.get(index).getConnectionName();
}
/** Get a specific pipeline stage description.
*@param index is the index of the pipeline stage whose description to get.
*@return the name of the connection.
*/
@Override
public String getPipelineStageDescription(int index)
{
return pipelineStages.get(index).getDescription();
}
/** Get a specific pipeline stage specification.
*@param index is the index of the pipeline stage whose specification is needed.
*@return the specification for the connection.
*/
@Override
public Specification getPipelineStageSpecification(int index)
{
return pipelineStages.get(index).getSpecification();
}
/** Delete a pipeline stage.
*@param index is the index of the pipeline stage to delete.
*/
@Override
public void deletePipelineStage(int index)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
PipelineStage ps = pipelineStages.remove(index);
int stage = index;
while (stage < pipelineStages.size())
{
pipelineStages.get(stage).adjustForDelete(index,ps.getPrerequisiteStage());
stage++;
}
}
/** Clear notification connections.
*/
@Override
public void clearNotifications()
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
notifications.clear();
}
/** Add a notification.
*@param notificationConnectionName is the name of the notification connection to add.
*@param notificationDescription is a description of the notification being added.
*@return the empty specification for this notification.
*/
@Override
public Specification addNotification(String notificationConnectionName, String notificationDescription)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
Notification ps = new Notification(notificationConnectionName,notificationDescription);
notifications.add(ps);
return ps.getSpecification();
}
/** Get a count of pipeline connections.
*@return the current number of pipeline connections.
*/
@Override
public int countNotifications()
{
return notifications.size();
}
/** Get a specific notification connection name.
*@param index is the index of the notification whose connection name to get.
*@return the name of the connection.
*/
@Override
public String getNotificationConnectionName(int index)
{
return notifications.get(index).getConnectionName();
}
/** Get a specific notification description.
*@param index is the index of the notification whose description to get.
*@return the name of the connection.
*/
@Override
public String getNotificationDescription(int index)
{
return notifications.get(index).getDescription();
}
/** Get a specific notification specification.
*@param index is the index of the notification whose specification is needed.
*@return the specification for the connection.
*/
@Override
public Specification getNotificationSpecification(int index)
{
return notifications.get(index).getSpecification();
}
/** Delete a notification.
*@param index is the index of the notification to delete.
*/
@Override
public void deleteNotification(int index)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
notifications.remove(index);
}
/** Insert a new notification.
*@param index is the index to insert pipeline stage before
*@param notificationConnectionName is the connection name.
*@param notificationDescription is the description.
*@return the newly-created output specification.
*/
@Override
public Specification insertNotification(int index, String notificationConnectionName, String notificationDescription)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
// What we do here depends on the kind of stage we're inserting.
// Both kinds take the current stage's prerequisite as their own. But what happens to the current stage will
// differ as to whether its reference changes or not.
Notification ps = new Notification(notificationConnectionName,notificationDescription);
notifications.add(index,ps);
return ps.getSpecification();
}
/** Set the job type.
*@param type is the type (as an integer).
*/
@Override
public void setType(int type)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.type = type;
}
/** Get the job type.
*@return the type (as an integer).
*/
@Override
public int getType()
{
return type;
}
/** Set the job's start method.
*@param startMethod is the start description.
*/
@Override
public void setStartMethod(int startMethod)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.startMethod = startMethod;
}
/** Get the job's start method.
*@return the start method.
*/
@Override
public int getStartMethod()
{
return startMethod;
}
// For day-specific jobs. These occur at a given time that matches the specifications.
// The specifications set certain criteria (specific hours, days of the week, etc.)
/** Clear all the scheduling records.
*/
@Override
public void clearScheduleRecords()
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
scheduleList.clear();
}
/** Add a record.
*@param record is the record to add.
*/
@Override
public void addScheduleRecord(ScheduleRecord record)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
scheduleList.addRecord(record);
}
/** Get the number of schedule records.
*@return the count.
*/
@Override
public int getScheduleRecordCount()
{
return scheduleList.getRecordCount();
}
/** Get a specified schedule record.
*@param index is the record number.
*@return the record.
*/
@Override
public ScheduleRecord getScheduleRecord(int index)
{
return scheduleList.getRecord(index);
}
/** Delete a specified schedule record.
*@param index is the record number.
*/
@Override
public void deleteScheduleRecord(int index)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
scheduleList.deleteRecord(index);
}
// For continuous jobs
// This is the rescheduling interval to use when no calculated interval is known
/** Set the rescheduling interval, in milliseconds.
*@param interval is the default interval, or null for infinite.
*/
@Override
public void setInterval(Long interval)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.interval = interval;
}
/** Get the rescheduling interval, in milliseconds.
*@return the default interval, or null for infinite.
*/
@Override
public Long getInterval()
{
return interval;
}
/** Set the maximum rescheduling interval, in milliseconds, or null if forever.
*@param interval is the maximum interval.
*/
@Override
public void setMaxInterval(Long interval)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.maxInterval = interval;
}
/** Get the maximum rescheduling interval, in milliseconds.
*@return the max interval, or null if forever.
*/
@Override
public Long getMaxInterval()
{
return maxInterval;
}
/** Set the expiration time, in milliseconds.
*@param time is the maximum expiration time of a document, in milliseconds, or null if none.
*/
@Override
public void setExpiration(Long time)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
expiration = time;
}
/** Get the expiration time, in milliseconds.
*@return the maximum expiration time of a document, or null if none.
*/
@Override
public Long getExpiration()
{
return expiration;
}
/** Set the reseeding interval, in milliseconds.
*@param interval is the interval, or null for infinite.
*/
@Override
public void setReseedInterval(Long interval)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.reseedInterval = interval;
}
/** Get the reseeding interval, in milliseconds.
*@return the interval, or null if infinite.
*/
@Override
public Long getReseedInterval()
{
return reseedInterval;
}
/** Get the document specification.
*@return the document specification object.
*/
@Override
public Specification getSpecification()
{
return documentSpecification;
}
/** Set the job priority. This is a simple integer between 1 and 10, where
* 1 is the highest priority.
*@param priority is the priority.
*/
@Override
public void setPriority(int priority)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
this.priority = priority;
}
/** Get the job priority.
*@return the priority (a number between 1 and 10).
*/
@Override
public int getPriority()
{
return priority;
}
// Hopcount filters
/** Get the set of hopcount filters the job has defined.
*@return the set as a map, keyed by Strings and containing Longs.
*/
@Override
public Map getHopCountFilters()
{
return (Map)hopCountFilters.clone();
}
/** Clear the set of hopcount filters for the job.
*/
@Override
public void clearHopCountFilters()
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
hopCountFilters.clear();
}
/** Add a hopcount filter to the job.
*@param linkType is the type of link the filter applies to.
*@param maxHops is the maximum hop count. Use null to remove a filter.
*/
@Override
public void addHopCountFilter(String linkType, Long maxHops)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
hopCountFilters.put(linkType,maxHops);
}
/** Get the hopcount mode. */
@Override
public int getHopcountMode()
{
return hopcountMode;
}
/** Set the hopcount mode. */
@Override
public void setHopcountMode(int mode)
{
if (readOnly)
throw new IllegalStateException("Attempt to change read-only object");
hopcountMode = mode;
}
protected static class Notification
{
protected final String connectionName;
protected final String description;
protected final Specification specification;
public Notification(String connectionName, String description)
{
this.connectionName = connectionName;
this.description = description;
this.specification = new Specification();
}
public Notification(String connectionName, String description, Specification spec)
{
this.connectionName = connectionName;
this.description = description;
this.specification = spec;
}
public Specification getSpecification()
{
return specification;
}
public String getConnectionName()
{
return connectionName;
}
public String getDescription()
{
return description;
}
}
protected static class PipelineStage
{
protected int prerequisiteStage;
protected final boolean isOutput;
protected final String connectionName;
protected final String description;
protected final Specification specification;
public PipelineStage(int prerequisiteStage, boolean isOutput, String connectionName, String description)
{
this.prerequisiteStage = prerequisiteStage;
this.isOutput = isOutput;
this.connectionName = connectionName;
this.description = description;
this.specification = new Specification();
}
public PipelineStage(int prerequisiteStage, boolean isOutput, String connectionName, String description, Specification spec)
{
this.prerequisiteStage = prerequisiteStage;
this.isOutput = isOutput;
this.connectionName = connectionName;
this.description = description;
this.specification = spec;
}
public void adjustReplacedStage(int index, boolean isOutput)
{
if (!isOutput)
prerequisiteStage = index;
else
adjustForInsert(index);
}
public void adjustForInsert(int index)
{
if (prerequisiteStage >= index)
{
prerequisiteStage++;
}
}
public void adjustForDelete(int index, int prerequisite)
{
if (prerequisiteStage > index)
prerequisiteStage--;
else if (prerequisiteStage == index)
prerequisiteStage = prerequisite;
}
public Specification getSpecification()
{
return specification;
}
public int getPrerequisiteStage()
{
return prerequisiteStage;
}
public boolean getIsOutput()
{
return isOutput;
}
public String getConnectionName()
{
return connectionName;
}
public String getDescription()
{
return description;
}
}
}