/* $Id: QueuedDocument.java 988245 2010-08-23 18:39:35Z kwright $ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.system; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.agents.interfaces.*; import org.apache.manifoldcf.crawler.interfaces.*; import java.util.*; /** This class represents a document that will be placed on the document queue, and will be * processed by a worker thread. * The reason that DocumentDescription by itself is not used has to do with the fact that * a good deal more information about the document must be obtained in order to find the * last version ingested (which must be done in bulk, for performance reasons). Since we * are finding everything anyway, it makes sense to put what we have in a structure so * that the worker threads don't need to repeat what the stuffer thread did. */ public class QueuedDocument { public static final String _rcsid = "@(#)$Id: QueuedDocument.java 988245 2010-08-23 18:39:35Z kwright $"; /** The document description. */ protected final DocumentDescription documentDescription; /** The last ingested status, null meaning "never ingested". */ protected final Map<String,DocumentIngestStatusSet> lastIngestedStatus; /** The binnames for the document, according to the connector */ protected final String[] binNames; /** This flag indicates whether the document has been processed or not. */ protected boolean wasProcessed = false; /** Constructor. *@param documentDescription is the document description. *@param lastIngestedStatus is the document's last ingested status. *@param binNames are the bins associated with the document. */ public QueuedDocument(DocumentDescription documentDescription, Map<String,DocumentIngestStatusSet> lastIngestedStatus, String[] binNames) { this.documentDescription = documentDescription; this.lastIngestedStatus = lastIngestedStatus; this.binNames = binNames; } /** Get the document description. *@return the document description. */ public DocumentDescription getDocumentDescription() { return documentDescription; } /** Get the last ingested status. *@param outputConnectionName is the name of the output connection. *@return the last ingested status for that output, or null if not found. */ public DocumentIngestStatusSet getLastIngestedStatus(String outputConnectionName) { if (lastIngestedStatus == null) return null; return lastIngestedStatus.get(outputConnectionName); } /** Return true if there are *any* last ingested records. *@return true if any last ingested records exist. */ public boolean anyLastIngestedRecords() { if (lastIngestedStatus == null) return false; return lastIngestedStatus.size() > 0; } /** Get the bin names for this document */ public String[] getBinNames() { return binNames; } /** Check if document has been processed yet. *@return true if processed, false if not. */ public boolean wasProcessed() { return wasProcessed; } /** Note that the document was processed in some way. */ public void setProcessed() { wasProcessed = true; } }