/*
* Autopsy Forensic Browser
*
* Copyright 2011-2016 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.ingest;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import javax.swing.JOptionPane;
import org.netbeans.api.progress.ProgressHandle;
import org.openide.util.Cancellable;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.NetworkUtils;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.IngestJobInfo;
import org.sleuthkit.datamodel.IngestJobInfo.IngestJobStatusType;
import org.sleuthkit.datamodel.IngestModuleInfo;
import org.sleuthkit.datamodel.IngestModuleInfo.IngestModuleType;
import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskCoreException;
/**
* Encapsulates a data source and the ingest module pipelines used to process
* it.
*/
final class DataSourceIngestJob {
private static final Logger logger = Logger.getLogger(DataSourceIngestJob.class.getName());
/**
* These fields define a data source ingest job: the parent ingest job, an
* ID, the user's ingest job settings, and the data source to be processed.
*/
private final IngestJob parentJob;
private static final AtomicLong nextJobId = new AtomicLong(0L);
private final long id;
private final IngestJobSettings settings;
private final Content dataSource;
/**
* A data source ingest job runs in stages.
*/
private static enum Stages {
/**
* Setting up for processing.
*/
INITIALIZATION,
/**
* Running high priority data source level ingest modules and file level
* ingest modules.
*/
FIRST,
/**
* Running lower priority, usually long-running, data source level
* ingest modules.
*/
SECOND,
/**
* Cleaning up.
*/
FINALIZATION
};
private volatile Stages stage = DataSourceIngestJob.Stages.INITIALIZATION;
private final Object stageCompletionCheckLock = new Object();
/**
* A data source ingest job has separate data source level ingest module
* pipelines for the first and second processing stages. Longer running,
* lower priority modules belong in the second stage pipeline, although this
* cannot be enforced. Note that the pipelines for both stages are created
* at job start up to allow for verification that they both can be started
* up without errors.
*/
private final Object dataSourceIngestPipelineLock = new Object();
private DataSourceIngestPipeline firstStageDataSourceIngestPipeline;
private DataSourceIngestPipeline secondStageDataSourceIngestPipeline;
private DataSourceIngestPipeline currentDataSourceIngestPipeline;
/**
* A data source ingest job has a collection of identical file level ingest
* module pipelines, one for each file level ingest thread in the ingest
* manager. A blocking queue is used to dole out the pipelines to the
* threads and an ordinary list is used when the ingest job needs to access
* the pipelines to query their status.
*/
private final LinkedBlockingQueue<FileIngestPipeline> fileIngestPipelinesQueue = new LinkedBlockingQueue<>();
private final List<FileIngestPipeline> fileIngestPipelines = new ArrayList<>();
/**
* A data source ingest job supports cancellation of either the currently
* running data source level ingest module or the entire ingest job.
*
* TODO: The currentDataSourceIngestModuleCancelled field and all of the
* code concerned with it is a hack to avoid an API change. The next time an
* API change is legal, a cancel() method needs to be added to the
* IngestModule interface and this field should be removed. The "ingest job
* is canceled" queries should also be removed from the IngestJobContext
* class.
*/
private volatile boolean currentDataSourceIngestModuleCancelled;
private volatile boolean cancelled;
private volatile IngestJob.CancellationReason cancellationReason = IngestJob.CancellationReason.NOT_CANCELLED;
private final Object cancellationStateMonitor = new Object();
private final List<String> cancelledDataSourceIngestModules = new CopyOnWriteArrayList<>();
/**
* A data source ingest job uses the task scheduler singleton to create and
* queue the ingest tasks that make up the job.
*/
private static final IngestTasksScheduler taskScheduler = IngestTasksScheduler.getInstance();
/**
* A data source ingest job can run interactively using NetBeans progress
* handles.
*/
private final boolean doUI;
/**
* A data source ingest job uses these fields to report data source level
* ingest progress.
*/
private final Object dataSourceIngestProgressLock = new Object();
private ProgressHandle dataSourceIngestProgress;
/**
* A data source ingest job uses these fields to report file level ingest
* progress.
*/
private final Object fileIngestProgressLock = new Object();
private final List<String> filesInProgress = new ArrayList<>();
private long estimatedFilesToProcess;
private long processedFiles;
private ProgressHandle fileIngestProgress;
private String currentFileIngestModule = "";
private String currentFileIngestTask = "";
private List<IngestModuleInfo> ingestModules = new ArrayList<>();
private IngestJobInfo ingestJob;
/**
* A data source ingest job uses this field to report its creation time.
*/
private final long createTime;
/**
* Constructs an object that encapsulates a data source and the ingest
* module pipelines used to process it.
*
* @param parentJob The ingest job of which this data source ingest
* job is a part.
* @param dataSource The data source to be ingested.
* @param settings The settings for the ingest job.
* @param runInteractively Whether or not this job should use NetBeans
* progress handles.
*/
DataSourceIngestJob(IngestJob parentJob, Content dataSource, IngestJobSettings settings, boolean runInteractively) {
this.parentJob = parentJob;
this.id = DataSourceIngestJob.nextJobId.getAndIncrement();
this.dataSource = dataSource;
this.settings = settings;
this.doUI = runInteractively;
this.createTime = new Date().getTime();
this.createIngestPipelines();
}
/**
* Creates the file and data source ingest pipelines.
*/
private void createIngestPipelines() {
List<IngestModuleTemplate> ingestModuleTemplates = this.settings.getEnabledIngestModuleTemplates();
/**
* Make mappings of ingest module factory class names to templates.
*/
Map<String, IngestModuleTemplate> dataSourceModuleTemplates = new HashMap<>();
Map<String, IngestModuleTemplate> fileModuleTemplates = new HashMap<>();
for (IngestModuleTemplate template : ingestModuleTemplates) {
if (template.isDataSourceIngestModuleTemplate()) {
dataSourceModuleTemplates.put(template.getModuleFactory().getClass().getCanonicalName(), template);
}
if (template.isFileIngestModuleTemplate()) {
fileModuleTemplates.put(template.getModuleFactory().getClass().getCanonicalName(), template);
}
}
/**
* Use the mappings and the ingest pipelines configuration to create
* ordered lists of ingest module templates for each ingest pipeline.
*/
IngestPipelinesConfiguration pipelineConfigs = IngestPipelinesConfiguration.getInstance();
List<IngestModuleTemplate> firstStageDataSourceModuleTemplates = DataSourceIngestJob.getConfiguredIngestModuleTemplates(dataSourceModuleTemplates, pipelineConfigs.getStageOneDataSourceIngestPipelineConfig());
List<IngestModuleTemplate> fileIngestModuleTemplates = DataSourceIngestJob.getConfiguredIngestModuleTemplates(fileModuleTemplates, pipelineConfigs.getFileIngestPipelineConfig());
List<IngestModuleTemplate> secondStageDataSourceModuleTemplates = DataSourceIngestJob.getConfiguredIngestModuleTemplates(dataSourceModuleTemplates, pipelineConfigs.getStageTwoDataSourceIngestPipelineConfig());
/**
* Add any module templates that were not specified in the pipelines
* configuration to an appropriate pipeline - either the first stage
* data source ingest pipeline or the file ingest pipeline.
*/
for (IngestModuleTemplate template : dataSourceModuleTemplates.values()) {
firstStageDataSourceModuleTemplates.add(template);
}
for (IngestModuleTemplate template : fileModuleTemplates.values()) {
fileIngestModuleTemplates.add(template);
}
/**
* Construct the data source ingest pipelines.
*/
this.firstStageDataSourceIngestPipeline = new DataSourceIngestPipeline(this, firstStageDataSourceModuleTemplates);
this.secondStageDataSourceIngestPipeline = new DataSourceIngestPipeline(this, secondStageDataSourceModuleTemplates);
/**
* Construct the file ingest pipelines, one per file ingest thread.
*/
try {
int numberOfFileIngestThreads = IngestManager.getInstance().getNumberOfFileIngestThreads();
for (int i = 0; i < numberOfFileIngestThreads; ++i) {
FileIngestPipeline pipeline = new FileIngestPipeline(this, fileIngestModuleTemplates);
this.fileIngestPipelinesQueue.put(pipeline);
this.fileIngestPipelines.add(pipeline);
}
} catch (InterruptedException ex) {
/**
* The current thread was interrupted while blocked on a full queue.
* Blocking should actually never happen here, but reset the
* interrupted flag rather than just swallowing the exception.
*/
Thread.currentThread().interrupt();
}
SleuthkitCase skCase = Case.getCurrentCase().getSleuthkitCase();
try {
this.addIngestModules(firstStageDataSourceModuleTemplates, IngestModuleType.DATA_SOURCE_LEVEL, skCase);
this.addIngestModules(fileIngestModuleTemplates, IngestModuleType.FILE_LEVEL, skCase);
this.addIngestModules(secondStageDataSourceModuleTemplates, IngestModuleType.DATA_SOURCE_LEVEL, skCase);
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, "Failed to add ingest modules to database.", ex);
}
}
private void addIngestModules(List<IngestModuleTemplate> templates, IngestModuleType type, SleuthkitCase skCase) throws TskCoreException {
for (IngestModuleTemplate module : templates) {
ingestModules.add(skCase.addIngestModule(module.getModuleName(), FactoryClassNameNormalizer.normalize(module.getModuleFactory().getClass().getCanonicalName()), type, module.getModuleFactory().getModuleVersionNumber()));
}
}
/**
* Uses an input collection of ingest module templates and a pipeline
* configuration, i.e., an ordered list of ingest module factory class
* names, to create an ordered output list of ingest module templates for an
* ingest pipeline. The ingest module templates are removed from the input
* collection as they are added to the output collection.
*
* @param ingestModuleTemplates A mapping of ingest module factory class
* names to ingest module templates.
* @param pipelineConfig An ordered list of ingest module factory
* class names representing an ingest pipeline.
*
* @return An ordered list of ingest module templates, i.e., an
* uninstantiated pipeline.
*/
private static List<IngestModuleTemplate> getConfiguredIngestModuleTemplates(Map<String, IngestModuleTemplate> ingestModuleTemplates, List<String> pipelineConfig) {
List<IngestModuleTemplate> templates = new ArrayList<>();
for (String moduleClassName : pipelineConfig) {
if (ingestModuleTemplates.containsKey(moduleClassName)) {
templates.add(ingestModuleTemplates.remove(moduleClassName));
}
}
return templates;
}
/**
* Gets the identifier of this job.
*
* @return The job identifier.
*/
long getId() {
return this.id;
}
/**
* Get the ingest execution context identifier.
*
* @return The context string.
*/
String getExecutionContext() {
return this.settings.getExecutionContext();
}
/**
* Gets the data source to be ingested by this job.
*
* @return A Content object representing the data source.
*/
Content getDataSource() {
return this.dataSource;
}
/**
* Queries whether or not unallocated space should be processed as part of
* this job.
*
* @return True or false.
*/
boolean shouldProcessUnallocatedSpace() {
return this.settings.getProcessUnallocatedSpace();
}
/**
* Checks to see if this job has at least one ingest pipeline.
*
* @return True or false.
*/
boolean hasIngestPipeline() {
return this.hasFirstStageDataSourceIngestPipeline()
|| this.hasFileIngestPipeline()
|| this.hasSecondStageDataSourceIngestPipeline();
}
/**
* Checks to see if this job has a first stage data source level ingest
* pipeline.
*
* @return True or false.
*/
private boolean hasFirstStageDataSourceIngestPipeline() {
return (this.firstStageDataSourceIngestPipeline.isEmpty() == false);
}
/**
* Checks to see if this job has a second stage data source level ingest
* pipeline.
*
* @return True or false.
*/
private boolean hasSecondStageDataSourceIngestPipeline() {
return (this.secondStageDataSourceIngestPipeline.isEmpty() == false);
}
/**
* Checks to see if this job has a file level ingest pipeline.
*
* @return True or false.
*/
private boolean hasFileIngestPipeline() {
if (!this.fileIngestPipelines.isEmpty()) {
return !this.fileIngestPipelines.get(0).isEmpty();
}
return false;
}
/**
* Starts up the ingest pipelines for this job.
*
* @return A collection of ingest module startup errors, empty on success.
*/
List<IngestModuleError> start() {
List<IngestModuleError> errors = startUpIngestPipelines();
if (errors.isEmpty()) {
if (this.hasFirstStageDataSourceIngestPipeline() || this.hasFileIngestPipeline()) {
logger.log(Level.INFO, "Starting first stage analysis for {0} (jobId={1})", new Object[]{dataSource.getName(), this.id}); //NON-NLS
this.startFirstStage();
} else if (this.hasSecondStageDataSourceIngestPipeline()) {
logger.log(Level.INFO, "Starting second stage analysis for {0} (jobId={1}), no first stage configured", new Object[]{dataSource.getName(), this.id}); //NON-NLS
this.startSecondStage();
}
try {
this.ingestJob = Case.getCurrentCase().getSleuthkitCase().addIngestJob(dataSource, NetworkUtils.getLocalHostName(), ingestModules, new Date(this.createTime), new Date(0), IngestJobStatusType.STARTED, "");
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, "Failed to add ingest job to database.", ex);
}
}
return errors;
}
/**
* Starts up each of the ingest pipelines for this job to collect any file
* and data source level ingest modules errors that might occur.
*
* @return A collection of ingest module startup errors, empty on success.
*/
private List<IngestModuleError> startUpIngestPipelines() {
List<IngestModuleError> errors = new ArrayList<>();
/*
* Start the data-source-level ingest module pipelines.
*/
errors.addAll(this.firstStageDataSourceIngestPipeline.startUp());
errors.addAll(this.secondStageDataSourceIngestPipeline.startUp());
/*
* If the data-source-level ingest pipelines were successfully started,
* start the Start the file-level ingest pipelines (one per file ingest
* thread).
*/
if (errors.isEmpty()) {
for (FileIngestPipeline pipeline : this.fileIngestPipelinesQueue) {
errors.addAll(pipeline.startUp());
if (!errors.isEmpty()) {
/*
* If there are start up errors, the ingest job will not
* proceed, so shut down any file ingest pipelines that did
* start up.
*/
while (!this.fileIngestPipelinesQueue.isEmpty()) {
FileIngestPipeline startedPipeline = this.fileIngestPipelinesQueue.poll();
if (startedPipeline.isRunning()) {
List<IngestModuleError> shutDownErrors = startedPipeline.shutDown();
if (!shutDownErrors.isEmpty()) {
/*
* The start up errors will ultimately be
* reported to the user for possible remedy, but
* the shut down errors are logged here.
*/
logIngestModuleErrors(shutDownErrors);
}
}
}
break;
}
}
}
return errors;
}
/**
* Starts the first stage of this job.
*/
private void startFirstStage() {
this.stage = DataSourceIngestJob.Stages.FIRST;
if (this.hasFileIngestPipeline()) {
synchronized (this.fileIngestProgressLock) {
this.estimatedFilesToProcess = this.dataSource.accept(new GetFilesCountVisitor());
}
}
if (this.doUI) {
/**
* Start one or both of the first stage ingest progress bars.
*/
if (this.hasFirstStageDataSourceIngestPipeline()) {
this.startDataSourceIngestProgressBar();
}
if (this.hasFileIngestPipeline()) {
this.startFileIngestProgressBar();
}
}
/**
* Make the first stage data source level ingest pipeline the current
* data source level pipeline.
*/
synchronized (this.dataSourceIngestPipelineLock) {
this.currentDataSourceIngestPipeline = this.firstStageDataSourceIngestPipeline;
}
/**
* Schedule the first stage tasks.
*/
if (this.hasFirstStageDataSourceIngestPipeline() && this.hasFileIngestPipeline()) {
logger.log(Level.INFO, "Scheduling first stage data source and file level analysis tasks for {0} (jobId={1})", new Object[]{dataSource.getName(), this.id}); //NON-NLS
DataSourceIngestJob.taskScheduler.scheduleIngestTasks(this);
} else if (this.hasFirstStageDataSourceIngestPipeline()) {
logger.log(Level.INFO, "Scheduling first stage data source level analysis tasks for {0} (jobId={1}), no file level analysis configured", new Object[]{dataSource.getName(), this.id}); //NON-NLS
DataSourceIngestJob.taskScheduler.scheduleDataSourceIngestTask(this);
} else {
logger.log(Level.INFO, "Scheduling file level analysis tasks for {0} (jobId={1}), no first stage data source level analysis configured", new Object[]{dataSource.getName(), this.id}); //NON-NLS
DataSourceIngestJob.taskScheduler.scheduleFileIngestTasks(this);
/**
* No data source ingest task has been scheduled for this stage, and
* it is possible, if unlikely, that no file ingest tasks were
* actually scheduled since there are files that get filtered out by
* the tasks scheduler. In this special case, an ingest thread will
* never get to check for completion of this stage of the job, so do
* it now.
*/
this.checkForStageCompleted();
}
}
/**
* Starts the second stage of this ingest job.
*/
private void startSecondStage() {
logger.log(Level.INFO, "Starting second stage analysis for {0} (jobId={1})", new Object[]{dataSource.getName(), this.id}); //NON-NLS
this.stage = DataSourceIngestJob.Stages.SECOND;
if (this.doUI) {
this.startDataSourceIngestProgressBar();
}
synchronized (this.dataSourceIngestPipelineLock) {
this.currentDataSourceIngestPipeline = this.secondStageDataSourceIngestPipeline;
}
logger.log(Level.INFO, "Scheduling second stage data source level analysis tasks for {0} (jobId={1})", new Object[]{dataSource.getName(), this.id}); //NON-NLS
DataSourceIngestJob.taskScheduler.scheduleDataSourceIngestTask(this);
}
/**
* Starts a data source level ingest progress bar for this job.
*/
private void startDataSourceIngestProgressBar() {
if (this.doUI) {
synchronized (this.dataSourceIngestProgressLock) {
String displayName = NbBundle.getMessage(this.getClass(),
"IngestJob.progress.dataSourceIngest.initialDisplayName",
this.dataSource.getName());
this.dataSourceIngestProgress = ProgressHandle.createHandle(displayName, new Cancellable() {
@Override
public boolean cancel() {
// If this method is called, the user has already pressed
// the cancel button on the progress bar and the OK button
// of a cancelation confirmation dialog supplied by
// NetBeans. What remains to be done is to find out whether
// the user wants to cancel only the currently executing
// data source ingest module or the entire ingest job.
DataSourceIngestCancellationPanel panel = new DataSourceIngestCancellationPanel();
String dialogTitle = NbBundle.getMessage(DataSourceIngestJob.this.getClass(), "IngestJob.cancellationDialog.title");
JOptionPane.showConfirmDialog(null, panel, dialogTitle, JOptionPane.OK_OPTION, JOptionPane.PLAIN_MESSAGE);
if (panel.cancelAllDataSourceIngestModules()) {
DataSourceIngestJob.this.cancel(IngestJob.CancellationReason.USER_CANCELLED);
} else {
DataSourceIngestJob.this.cancelCurrentDataSourceIngestModule();
}
return true;
}
});
this.dataSourceIngestProgress.start();
this.dataSourceIngestProgress.switchToIndeterminate();
}
}
}
/**
* Starts the file level ingest progress bar for this job.
*/
private void startFileIngestProgressBar() {
if (this.doUI) {
synchronized (this.fileIngestProgressLock) {
String displayName = NbBundle.getMessage(this.getClass(),
"IngestJob.progress.fileIngest.displayName",
this.dataSource.getName());
this.fileIngestProgress = ProgressHandle.createHandle(displayName, new Cancellable() {
@Override
public boolean cancel() {
// If this method is called, the user has already pressed
// the cancel button on the progress bar and the OK button
// of a cancelation confirmation dialog supplied by
// NetBeans.
DataSourceIngestJob.this.cancel(IngestJob.CancellationReason.USER_CANCELLED);
return true;
}
});
this.fileIngestProgress.start();
this.fileIngestProgress.switchToDeterminate((int) this.estimatedFilesToProcess);
}
}
}
/**
* Checks to see if the ingest tasks for the current stage of this job are
* completed and does a stage transition if they are.
*/
private void checkForStageCompleted() {
synchronized (this.stageCompletionCheckLock) {
if (DataSourceIngestJob.taskScheduler.tasksForJobAreCompleted(this)) {
switch (this.stage) {
case FIRST:
this.finishFirstStage();
break;
case SECOND:
this.finish();
break;
}
}
}
}
/**
* Shuts down the first stage ingest pipelines and progress bars for this
* job and starts the second stage, if appropriate.
*/
private void finishFirstStage() {
logger.log(Level.INFO, "Finished first stage analysis for {0} (jobId={1})", new Object[]{dataSource.getName(), this.id}); //NON-NLS
// Shut down the file ingest pipelines. Note that no shut down is
// required for the data source ingest pipeline because data source
// ingest modules do not have a shutdown() method.
List<IngestModuleError> errors = new ArrayList<>();
while (!this.fileIngestPipelinesQueue.isEmpty()) {
FileIngestPipeline pipeline = fileIngestPipelinesQueue.poll();
if (pipeline.isRunning()) {
errors.addAll(pipeline.shutDown());
}
}
if (!errors.isEmpty()) {
logIngestModuleErrors(errors);
}
if (this.doUI) {
// Finish the first stage data source ingest progress bar, if it hasn't
// already been finished.
synchronized (this.dataSourceIngestProgressLock) {
if (this.dataSourceIngestProgress != null) {
this.dataSourceIngestProgress.finish();
this.dataSourceIngestProgress = null;
}
}
// Finish the file ingest progress bar, if it hasn't already
// been finished.
synchronized (this.fileIngestProgressLock) {
if (this.fileIngestProgress != null) {
this.fileIngestProgress.finish();
this.fileIngestProgress = null;
}
}
}
/**
* Start the second stage, if appropriate.
*/
if (!this.cancelled && this.hasSecondStageDataSourceIngestPipeline()) {
this.startSecondStage();
} else {
this.finish();
}
}
/**
* Shuts down the ingest pipelines and progress bars for this job.
*/
private void finish() {
logger.log(Level.INFO, "Finished analysis for {0} (jobId={1})", new Object[]{dataSource.getName(), this.id}); //NON-NLS
this.stage = DataSourceIngestJob.Stages.FINALIZATION;
if (this.doUI) {
// Finish the second stage data source ingest progress bar, if it hasn't
// already been finished.
synchronized (this.dataSourceIngestProgressLock) {
if (this.dataSourceIngestProgress != null) {
this.dataSourceIngestProgress.finish();
this.dataSourceIngestProgress = null;
}
}
}
if (this.cancelled) {
try {
ingestJob.setIngestJobStatus(IngestJobStatusType.CANCELLED);
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, "Failed to set ingest status for ingest job in database.", ex);
}
} else {
try {
ingestJob.setIngestJobStatus(IngestJobStatusType.COMPLETED);
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, "Failed to set ingest status for ingest job in database.", ex);
}
}
try {
this.ingestJob.setEndDateTime(new Date());
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, "Failed to set end date for ingest job in database.", ex);
}
this.parentJob.dataSourceJobFinished(this);
}
/**
* Passes the data source for this job through the currently active data
* source level ingest pipeline.
*
* @param task A data source ingest task wrapping the data source.
*/
void process(DataSourceIngestTask task) {
try {
synchronized (this.dataSourceIngestPipelineLock) {
if (!this.isCancelled() && !this.currentDataSourceIngestPipeline.isEmpty()) {
List<IngestModuleError> errors = new ArrayList<>();
errors.addAll(this.currentDataSourceIngestPipeline.process(task));
if (!errors.isEmpty()) {
logIngestModuleErrors(errors);
}
}
}
if (this.doUI) {
/**
* Shut down the data source ingest progress bar right away.
* Data source-level processing is finished for this stage.
*/
synchronized (this.dataSourceIngestProgressLock) {
if (null != this.dataSourceIngestProgress) {
this.dataSourceIngestProgress.finish();
this.dataSourceIngestProgress = null;
}
}
}
} finally {
DataSourceIngestJob.taskScheduler.notifyTaskCompleted(task);
this.checkForStageCompleted();
}
}
/**
* Passes a file from the data source for this job through the file level
* ingest pipeline.
*
* @param task A file ingest task.
*
* @throws InterruptedException if the thread executing this code is
* interrupted while blocked on taking from or
* putting to the file ingest pipelines
* collection.
*/
void process(FileIngestTask task) throws InterruptedException {
try {
if (!this.isCancelled()) {
FileIngestPipeline pipeline = this.fileIngestPipelinesQueue.take();
if (!pipeline.isEmpty()) {
AbstractFile file = task.getFile();
synchronized (this.fileIngestProgressLock) {
++this.processedFiles;
if (this.doUI) {
/**
* Update the file ingest progress bar.
*/
if (this.processedFiles <= this.estimatedFilesToProcess) {
this.fileIngestProgress.progress(file.getName(), (int) this.processedFiles);
} else {
this.fileIngestProgress.progress(file.getName(), (int) this.estimatedFilesToProcess);
}
this.filesInProgress.add(file.getName());
}
}
/**
* Run the file through the pipeline.
*/
List<IngestModuleError> errors = new ArrayList<>();
errors.addAll(pipeline.process(task));
if (!errors.isEmpty()) {
logIngestModuleErrors(errors);
}
if (this.doUI && !this.cancelled) {
synchronized (this.fileIngestProgressLock) {
/**
* Update the file ingest progress bar again, in
* case the file was being displayed.
*/
this.filesInProgress.remove(file.getName());
if (this.filesInProgress.size() > 0) {
this.fileIngestProgress.progress(this.filesInProgress.get(0));
} else {
this.fileIngestProgress.progress("");
}
}
}
}
this.fileIngestPipelinesQueue.put(pipeline);
}
} finally {
DataSourceIngestJob.taskScheduler.notifyTaskCompleted(task);
this.checkForStageCompleted();
}
}
/**
* Adds more files from the data source for this job to the job, i.e., adds
* extracted or carved files. Not currently supported for the second stage
* of the job.
*
* @param files A list of the files to add.
*/
void addFiles(List<AbstractFile> files) {
if (DataSourceIngestJob.Stages.FIRST == this.stage) {
for (AbstractFile file : files) {
DataSourceIngestJob.taskScheduler.scheduleFileIngestTask(this, file);
}
} else {
DataSourceIngestJob.logger.log(Level.SEVERE, "Adding files during second stage not supported"); //NON-NLS
}
/**
* The intended clients of this method are ingest modules running code
* on an ingest thread that is holding a reference to an ingest task, in
* which case a completion check would not be necessary, so this is a
* bit of defensive programming.
*/
this.checkForStageCompleted();
}
/**
* Updates the display name shown on the current data source level ingest
* progress bar for this job.
*
* @param displayName The new display name.
*/
void updateDataSourceIngestProgressBarDisplayName(String displayName) {
if (this.doUI && !this.cancelled) {
synchronized (this.dataSourceIngestProgressLock) {
this.dataSourceIngestProgress.setDisplayName(displayName);
}
}
}
/**
* Switches the data source level ingest progress bar for this job to
* determinate mode. This should be called if the total work units to
* process the data source is known.
*
* @param workUnits Total number of work units for the processing of the
* data source.
*/
void switchDataSourceIngestProgressBarToDeterminate(int workUnits) {
if (this.doUI && !this.cancelled) {
synchronized (this.dataSourceIngestProgressLock) {
if (null != this.dataSourceIngestProgress) {
this.dataSourceIngestProgress.switchToDeterminate(workUnits);
}
}
}
}
/**
* Switches the data source level ingest progress bar for this job to
* indeterminate mode. This should be called if the total work units to
* process the data source is unknown.
*/
void switchDataSourceIngestProgressBarToIndeterminate() {
if (this.doUI && !this.cancelled) {
synchronized (this.dataSourceIngestProgressLock) {
if (null != this.dataSourceIngestProgress) {
this.dataSourceIngestProgress.switchToIndeterminate();
}
}
}
}
/**
* Updates the data source level ingest progress bar for this job with the
* number of work units performed, if in the determinate mode.
*
* @param workUnits Number of work units performed.
*/
void advanceDataSourceIngestProgressBar(int workUnits) {
if (this.doUI && !this.cancelled) {
synchronized (this.dataSourceIngestProgressLock) {
if (null != this.dataSourceIngestProgress) {
this.dataSourceIngestProgress.progress("", workUnits);
}
}
}
}
/**
* Updates the data source level ingest progress for this job with a new
* task name, where the task name is the "subtitle" under the display name.
*
* @param currentTask The task name.
*/
void advanceDataSourceIngestProgressBar(String currentTask) {
if (this.doUI && !this.cancelled) {
synchronized (this.dataSourceIngestProgressLock) {
if (null != this.dataSourceIngestProgress) {
this.dataSourceIngestProgress.progress(currentTask);
}
}
}
}
/**
* Updates the data source level ingest progress bar for this with a new
* task name and the number of work units performed, if in the determinate
* mode. The task name is the "subtitle" under the display name.
*
* @param currentTask The task name.
* @param workUnits Number of work units performed.
*/
void advanceDataSourceIngestProgressBar(String currentTask, int workUnits) {
if (this.doUI && !this.cancelled) {
synchronized (this.fileIngestProgressLock) {
this.dataSourceIngestProgress.progress(currentTask, workUnits);
}
}
}
/**
* Queries whether or not a temporary cancellation of data source level
* ingest in order to stop the currently executing data source level ingest
* module is in effect for this job.
*
* @return True or false.
*/
boolean currentDataSourceIngestModuleIsCancelled() {
return this.currentDataSourceIngestModuleCancelled;
}
/**
* Rescind a temporary cancellation of data source level ingest that was
* used to stop a single data source level ingest module for this job.
*
* @param moduleDisplayName The display name of the module that was stopped.
*/
void currentDataSourceIngestModuleCancellationCompleted(String moduleDisplayName) {
this.currentDataSourceIngestModuleCancelled = false;
this.cancelledDataSourceIngestModules.add(moduleDisplayName);
if (this.doUI) {
/**
* A new progress bar must be created because the cancel button of
* the previously constructed component is disabled by NetBeans when
* the user selects the "OK" button of the cancellation confirmation
* dialog popped up by NetBeans when the progress bar cancel button
* is pressed.
*/
synchronized (this.dataSourceIngestProgressLock) {
this.dataSourceIngestProgress.finish();
this.dataSourceIngestProgress = null;
this.startDataSourceIngestProgressBar();
}
}
}
/**
* Gets the currently running data source level ingest module for this job.
*
* @return The currently running module, may be null.
*/
DataSourceIngestPipeline.PipelineModule getCurrentDataSourceIngestModule() {
if (null != this.currentDataSourceIngestPipeline) {
return this.currentDataSourceIngestPipeline.getCurrentlyRunningModule();
} else {
return null;
}
}
/**
* Requests a temporary cancellation of data source level ingest for this
* job in order to stop the currently executing data source ingest module.
*/
void cancelCurrentDataSourceIngestModule() {
this.currentDataSourceIngestModuleCancelled = true;
}
/**
* Requests cancellation of ingest, i.e., a shutdown of the data source
* level and file level ingest pipelines.
*
* @param reason The cancellation reason.
*/
void cancel(IngestJob.CancellationReason reason) {
if (this.doUI) {
/**
* Put a cancellation message on data source level ingest progress
* bar, if it is still running.
*/
synchronized (this.dataSourceIngestProgressLock) {
if (dataSourceIngestProgress != null) {
final String displayName = NbBundle.getMessage(this.getClass(),
"IngestJob.progress.dataSourceIngest.initialDisplayName",
dataSource.getName());
dataSourceIngestProgress.setDisplayName(
NbBundle.getMessage(this.getClass(),
"IngestJob.progress.cancelling",
displayName));
}
}
/**
* Put a cancellation message on the file level ingest progress bar,
* if it is still running.
*/
synchronized (this.fileIngestProgressLock) {
if (this.fileIngestProgress != null) {
final String displayName = NbBundle.getMessage(this.getClass(),
"IngestJob.progress.fileIngest.displayName",
this.dataSource.getName());
this.fileIngestProgress.setDisplayName(
NbBundle.getMessage(this.getClass(), "IngestJob.progress.cancelling",
displayName));
if (!this.currentFileIngestModule.isEmpty() && !this.currentFileIngestTask.isEmpty()) {
this.fileIngestProgress.progress(NbBundle.getMessage(this.getClass(),
"IngestJob.progress.fileIngest.cancelMessage",
this.currentFileIngestModule, this.currentFileIngestTask));
}
}
}
}
/*
* If the work is not already done, show this job as cancelled for the
* given reason.
*/
if (Stages.FINALIZATION != stage) {
synchronized (cancellationStateMonitor) {
/*
* These fields are volatile for reading, synchronized on the
* monitor here for writing.
*/
this.cancelled = true;
this.cancellationReason = reason;
}
}
/**
* Tell the task scheduler to cancel all pending tasks, i.e., tasks not
* not being performed by an ingest thread.
*/
DataSourceIngestJob.taskScheduler.cancelPendingTasksForIngestJob(this);
this.checkForStageCompleted();
}
/**
* Set the current module name being run and the file name it is running on.
* To be used for more detailed cancelling.
*
* @param moduleName Name of module currently running.
* @param taskName Name of file the module is running on.
*/
void setCurrentFileIngestModule(String moduleName, String taskName) {
this.currentFileIngestModule = moduleName;
this.currentFileIngestTask = taskName;
}
/**
* Queries whether or not cancellation, i.e., a shutdown of the data source
* level and file level ingest pipelines for this job, has been requested.
*
* @return True or false.
*/
boolean isCancelled() {
return this.cancelled;
}
/**
* Gets the reason this job was cancelled.
*
* @return The cancellation reason, may be not cancelled.
*/
IngestJob.CancellationReason getCancellationReason() {
return this.cancellationReason;
}
/**
* Write ingest module errors to the log.
*
* @param errors The errors.
*/
private void logIngestModuleErrors(List<IngestModuleError> errors) {
for (IngestModuleError error : errors) {
DataSourceIngestJob.logger.log(Level.SEVERE, String.format("%s experienced an error analyzing %s (jobId=%d)", error.getModuleDisplayName(), dataSource.getName(), this.id), error.getThrowable()); //NON-NLS
}
}
/**
* Gets a snapshot of this jobs state and performance.
*
* @return An ingest job statistics object.
*/
Snapshot getSnapshot(boolean getIngestTasksSnapshot) {
return new Snapshot(getIngestTasksSnapshot);
}
/**
* Stores basic diagnostic statistics for a data source ingest job.
*/
final class Snapshot {
private final String dataSource;
private final long jobId;
private final long jobStartTime;
private final long snapShotTime;
private final DataSourceIngestPipeline.PipelineModule dataSourceLevelIngestModule;
private boolean fileIngestRunning;
private Date fileIngestStartTime;
private final long processedFiles;
private final long estimatedFilesToProcess;
private final IngestTasksScheduler.IngestJobTasksSnapshot tasksSnapshot;
private final boolean jobCancelled;
private final IngestJob.CancellationReason jobCancellationReason;
private final List<String> cancelledDataSourceModules;
/**
* Constructs an object to store basic diagnostic statistics for a data
* source ingest job.
*/
Snapshot(boolean getIngestTasksSnapshot) {
this.dataSource = DataSourceIngestJob.this.dataSource.getName();
this.jobId = DataSourceIngestJob.this.id;
this.jobStartTime = DataSourceIngestJob.this.createTime;
this.dataSourceLevelIngestModule = DataSourceIngestJob.this.getCurrentDataSourceIngestModule();
/**
* Determine whether file ingest is running at the time of this
* snapshot and determine the earliest file ingest level pipeline
* start time, if file ingest was started at all.
*/
for (FileIngestPipeline pipeline : DataSourceIngestJob.this.fileIngestPipelines) {
if (pipeline.isRunning()) {
this.fileIngestRunning = true;
}
Date pipelineStartTime = pipeline.getStartTime();
if (null != pipelineStartTime && (null == this.fileIngestStartTime || pipelineStartTime.before(this.fileIngestStartTime))) {
this.fileIngestStartTime = pipelineStartTime;
}
}
this.jobCancelled = cancelled;
this.jobCancellationReason = cancellationReason;
this.cancelledDataSourceModules = new ArrayList<>(DataSourceIngestJob.this.cancelledDataSourceIngestModules);
if (getIngestTasksSnapshot) {
synchronized (DataSourceIngestJob.this.fileIngestProgressLock) {
this.processedFiles = DataSourceIngestJob.this.processedFiles;
this.estimatedFilesToProcess = DataSourceIngestJob.this.estimatedFilesToProcess;
this.snapShotTime = new Date().getTime();
}
this.tasksSnapshot = DataSourceIngestJob.taskScheduler.getTasksSnapshotForJob(this.jobId);
} else {
this.processedFiles = 0;
this.estimatedFilesToProcess = 0;
this.snapShotTime = new Date().getTime();
this.tasksSnapshot = null;
}
}
/**
* Gets time these statistics were collected.
*
* @return The statistics collection time as number of milliseconds
* since January 1, 1970, 00:00:00 GMT.
*/
long getSnapshotTime() {
return snapShotTime;
}
/**
* Gets the name of the data source associated with the ingest job that
* is the subject of this snapshot.
*
* @return A data source name string.
*/
String getDataSource() {
return dataSource;
}
/**
* Gets the identifier of the ingest job that is the subject of this
* snapshot.
*
* @return The ingest job id.
*/
long getJobId() {
return this.jobId;
}
/**
* Gets the time the ingest job was started.
*
* @return The start time as number of milliseconds since January 1,
* 1970, 00:00:00 GMT.
*/
long getJobStartTime() {
return jobStartTime;
}
DataSourceIngestPipeline.PipelineModule getDataSourceLevelIngestModule() {
return this.dataSourceLevelIngestModule;
}
boolean fileIngestIsRunning() {
return this.fileIngestRunning;
}
Date fileIngestStartTime() {
return this.fileIngestStartTime;
}
/**
* Gets files per second throughput since the ingest job that is the
* subject of this snapshot started.
*
* @return Files processed per second (approximate).
*/
double getSpeed() {
return (double) processedFiles / ((snapShotTime - jobStartTime) / 1000);
}
/**
* Gets the number of files processed for the job so far.
*
* @return The number of processed files.
*/
long getFilesProcessed() {
return processedFiles;
}
/**
* Gets an estimate of the files that still need to be processed for
* this job.
*
* @return The estimate.
*/
long getFilesEstimated() {
return estimatedFilesToProcess;
}
long getRootQueueSize() {
if (null == this.tasksSnapshot) {
return 0;
}
return this.tasksSnapshot.getRootQueueSize();
}
long getDirQueueSize() {
if (null == this.tasksSnapshot) {
return 0;
}
return this.tasksSnapshot.getDirectoryTasksQueueSize();
}
long getFileQueueSize() {
if (null == this.tasksSnapshot) {
return 0;
}
return this.tasksSnapshot.getFileQueueSize();
}
long getDsQueueSize() {
if (null == this.tasksSnapshot) {
return 0;
}
return this.tasksSnapshot.getDsQueueSize();
}
long getRunningListSize() {
if (null == this.tasksSnapshot) {
return 0;
}
return this.tasksSnapshot.getRunningListSize();
}
boolean isCancelled() {
return this.jobCancelled;
}
/**
* Gets the reason this job was cancelled.
*
* @return The cancellation reason, may be not cancelled.
*/
IngestJob.CancellationReason getCancellationReason() {
return this.jobCancellationReason;
}
/**
* Gets a list of the display names of any canceled data source level
* ingest modules
*
* @return A list of canceled data source level ingest module display
* names, possibly empty.
*/
List<String> getCancelledDataSourceIngestModules() {
return Collections.unmodifiableList(this.cancelledDataSourceModules);
}
}
}