// Copyright 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.enterprise.connector.util.diffing;
import com.google.common.annotations.VisibleForTesting;
import java.sql.Timestamp;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.logging.Logger;
/**
* Bounded buffer of {@link Change} objects for buffering between the
* {@link DocumentSnapshotRepositoryMonitor} and the
* {@link CheckpointAndChangeQueue}.
*
* @since 2.8
*/
@VisibleForTesting
public class ChangeQueue implements ChangeSource {
private final BlockingQueue<Change> pendingChanges;
/** Milliseconds to sleep after a scan that finds no changes. */
private volatile long sleepInterval;
/** Logger that records crawl activities for each repository scan.*/
private final CrawlActivityLogger activityLogger;
/**
* Flag that decides whether to add delay after each scan or only after
* scans with no changes found.
*/
private final boolean introduceDelayAfterEveryScan;
/**
* Interface to log the crawl activity for each crawl.
*/
public static interface CrawlActivityLogger {
/**Records the start time of the scan.
* @param time timestamp of the start time
*/
void scanBeginAt(Timestamp time);
/**
* Records the end time of the scan.
* @param time timestamp of the end time
*/
void scanEndAt(Timestamp time);
/**
* To record that crawling thread just received a document for
* which either the content or the meta data or both have changed since the
* last scan.
* @param documentId Id for the changed document
*/
void gotChangedDocument(String documentId);
/**
* To record that crawling thread just received a new document
* which was not present in the last scan.
* @param documentId Id of the newly added document
*/
void gotNewDocument(String documentId);
/**
* To record that crawling thread just found out that the
* previously existing document got deleted and is no longer present.
* @param documentId id of the deleted document
*/
void gotDeletedDocument(String documentId);
}
public static class DefaultCrawlActivityLogger implements CrawlActivityLogger {
private int newDocumentCount, changedDocumentCount, deletedDocumentCount;
private Timestamp startTime, endTime;
private static final Logger LOG = Logger.getLogger(
DefaultCrawlActivityLogger.class.getName());
@Override
public void scanBeginAt(Timestamp time) {
logCrawlStatistics();
resetLogStatistics();
startTime = time;
LOG.info("Scan started at : " + time);
}
@Override
public void scanEndAt(Timestamp time) {
endTime = time;
LOG.info("Scan completed at : " + endTime);
logCrawlStatistics();
resetLogStatistics();
}
/**
* This method resets all the variable that keep track of scans.
*/
private void resetLogStatistics() {
newDocumentCount = changedDocumentCount = deletedDocumentCount = 0;
startTime = endTime = null;
}
/**
* This method logs all the important information related to each scan of
* the crawling thread. It logs following information for each scan
* 1. Time taken to perform the complete scan. <br>
* 2. No. of new documents found. <br>
* 3. No. of changed documents found. <br>
* 4. No. of deleted documents found.
*/
private void logCrawlStatistics() {
if (startTime != null) {
LOG.info("Crawl statistics for this scan");
if (endTime == null) {
LOG.info("The scan failed to complete. The crawl statistics reflect the figures at the time of starting next scan");
endTime = new Timestamp(System.currentTimeMillis());
}
String duration = (new Long((endTime.getTime() - startTime.getTime()) / 1000)).toString();
LOG.info("Scan duration : " + duration + " seconds");
LOG.info("# of new documents found : " + newDocumentCount);
LOG.info("# of changed documents found : " + changedDocumentCount);
LOG.info("# of deleted documents found : " + deletedDocumentCount);
}
}
@Override
public void gotChangedDocument(String documentId) {
++changedDocumentCount;
LOG.fine("Changed document found during the crawl; document id is : " + documentId);
}
@Override
public void gotDeletedDocument(String documentId) {
++deletedDocumentCount;
LOG.fine("Deleted document found during the crawl; document id is : " + documentId);
}
@Override
public void gotNewDocument(String documentId) {
++newDocumentCount;
LOG.fine("New document found during the crawl; document id is : " + documentId);
}
}
/**
* Adds {@link Change Changes} to this queue.
*/
private class Callback implements DocumentSnapshotRepositoryMonitor.Callback {
private int changeCount = 0;
public void passBegin() {
changeCount = 0;
activityLogger.scanBeginAt(new Timestamp(System.currentTimeMillis()));
}
@Override
public void changedDocument(DocumentHandle dh, MonitorCheckpoint mcp)
throws InterruptedException {
++changeCount;
pendingChanges.put(new Change(Change.FactoryType.CLIENT, dh, mcp));
activityLogger.gotChangedDocument(dh.getDocumentId());
}
@Override
public void deletedDocument(DocumentHandle dh, MonitorCheckpoint mcp)
throws InterruptedException {
++changeCount;
pendingChanges.put(new Change(Change.FactoryType.INTERNAL, dh, mcp));
activityLogger.gotDeletedDocument(dh.getDocumentId());
}
@Override
public void newDocument(DocumentHandle dh, MonitorCheckpoint mcp)
throws InterruptedException {
++changeCount;
pendingChanges.put(new Change(Change.FactoryType.CLIENT, dh, mcp));
activityLogger.gotNewDocument(dh.getDocumentId());
}
@Override
public void passComplete(MonitorCheckpoint mcp) throws InterruptedException {
activityLogger.scanEndAt(new Timestamp(System.currentTimeMillis()));
if (introduceDelayAfterEveryScan || changeCount == 0) {
Thread.sleep(sleepInterval);
}
}
public boolean hasEnqueuedAtLeastOneChangeThisPass() {
return changeCount > 0;
}
@Override
public void passPausing(int sleepms) throws InterruptedException {
Thread.sleep(sleepms);
}
}
/**
* Create a new ChangeQueue.
*
* @param size the queue size
* @param sleepInterval how often to look for new changes, in milliseconds
* @param activityLogger a CrawlActivityLogger
*/
/* @VisibleForTesting */
public ChangeQueue(int size, long sleepInterval, CrawlActivityLogger activityLogger) {
this(size, sleepInterval, false, activityLogger);
}
private ChangeQueue(int size, long sleepInterval,
boolean introduceDelayAfterEachScan, CrawlActivityLogger activityLogger) {
pendingChanges = new ArrayBlockingQueue<Change>(size);
this.sleepInterval = sleepInterval;
this.activityLogger = activityLogger;
this.introduceDelayAfterEveryScan = introduceDelayAfterEachScan;
}
public ChangeQueue(QueuePropertyFetcher propertyFetcher,
CrawlActivityLogger activityLogger) {
this(propertyFetcher.getQueueSize(),
propertyFetcher.getDelayBetweenTwoScansInMillis(),
propertyFetcher.isIntroduceDelayAfterEveryScan(), activityLogger);
}
/**
* @return the monitor callback. This is a factory method for use by Spring,
* which needs a Callback to create a
* {@code DocumentSnapshotRepositoryMonitor}.
*/
public DocumentSnapshotRepositoryMonitor.Callback newCallback() {
return new Callback();
}
/**
* Gets the next available change from the ChangeQueue. Will wait up to
* 1/4 second for a change to appear if none is immediately available.
*
* @return the next available change, or {@code null} if no changes are
* available
*/
public Change getNextChange() {
try {
return pendingChanges.poll(250L, TimeUnit.MILLISECONDS);
} catch (InterruptedException ie) {
return null;
}
}
/**
* Sets the sleepInterval in milliseconds, normally passed from the
* retry delay in a TraversalSchedule.
*/
public void setSleepInterval(long sleepInterval) {
this.sleepInterval = sleepInterval;
}
@VisibleForTesting
long getSleepInterval() {
return this.sleepInterval;
}
/** Empties the queue of all pending changes. */
void clear() {
pendingChanges.clear();
}
/**
* Interface to retrieve the properties required for ChangeQueue.
*/
public static interface QueuePropertyFetcher {
/**
* Gets the queue size.
*/
int getQueueSize();
/**
* Gets the delay to add between two scans.
*/
long getDelayBetweenTwoScansInMillis();
/**
* Gets the flag to decide whether to sleep after each scan
* or only after scans with no changes found.
*/
boolean isIntroduceDelayAfterEveryScan();
}
}