/* * Autopsy Forensic Browser * * Copyright 2011 - 2014 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sleuthkit.autopsy.keywordsearch; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Timer; import java.util.TimerTask; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Level; import javax.swing.SwingUtilities; import javax.swing.SwingWorker; import org.netbeans.api.progress.aggregate.AggregateProgressFactory; import org.netbeans.api.progress.aggregate.AggregateProgressHandle; import org.netbeans.api.progress.aggregate.ProgressContributor; import org.openide.util.Cancellable; import org.openide.util.NbBundle; import org.openide.util.NbBundle.Messages; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; import org.sleuthkit.autopsy.coreutils.StopWatch; import org.sleuthkit.autopsy.ingest.IngestMessage; import org.sleuthkit.autopsy.ingest.IngestServices; import org.sleuthkit.datamodel.BlackboardArtifact; /** * Singleton keyword search manager: Launches search threads for each job and * performs commits, both on timed intervals. */ public final class SearchRunner { private static final Logger logger = Logger.getLogger(SearchRunner.class.getName()); private static SearchRunner instance = null; private IngestServices services = IngestServices.getInstance(); private Ingester ingester = null; private volatile boolean updateTimerRunning = false; private Timer updateTimer; // maps a jobID to the search private Map<Long, SearchJobInfo> jobs = new HashMap<>(); //guarded by "this" SearchRunner() { ingester = Server.getIngester(); updateTimer = new Timer(NbBundle.getMessage(this.getClass(), "SearchRunner.updateTimer.title.text"), true); // run as a daemon } /** * * @return the singleton object */ public static synchronized SearchRunner getInstance() { if (instance == null) { instance = new SearchRunner(); } return instance; } /** * Add a new job. Searches will be periodically performed after this is * called. * * @param jobId Job ID that this is associated with * @param dataSourceId Data source that is being indexed and that * searches should be restricted to. * @param keywordListNames List of keyword lists that will be searched. List * contents will be refreshed each search. */ public synchronized void startJob(long jobId, long dataSourceId, List<String> keywordListNames) { if (jobs.containsKey(jobId) == false) { logger.log(Level.INFO, "Adding job {0}", jobId); //NON-NLS SearchJobInfo jobData = new SearchJobInfo(jobId, dataSourceId, keywordListNames); jobs.put(jobId, jobData); } // keep track of how many threads / module instances from this job have asked for this jobs.get(jobId).incrementModuleReferenceCount(); // start the timer, if needed if ((jobs.size() > 0) && (updateTimerRunning == false)) { final long updateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000; updateTimer.scheduleAtFixedRate(new UpdateTimerTask(), updateIntervalMs, updateIntervalMs); updateTimerRunning = true; } } /** * Perform normal finishing of searching for this job, including one last * commit and search. Blocks until the final search is complete. * * @param jobId */ public void endJob(long jobId) { SearchJobInfo job; boolean readyForFinalSearch = false; synchronized (this) { job = jobs.get(jobId); if (job == null) { return; } // Only do final search if this is the last module/thread in this job to call endJob() if (job.decrementModuleReferenceCount() == 0) { jobs.remove(jobId); readyForFinalSearch = true; } } if (readyForFinalSearch) { commit(); doFinalSearch(job); //this will block until it's done } } /** * Immediate stop and removal of job from SearchRunner. Cancels the * associated search worker if it's still running. * * @param jobId */ public void stopJob(long jobId) { logger.log(Level.INFO, "Stopping job {0}", jobId); //NON-NLS commit(); SearchJobInfo job; synchronized (this) { job = jobs.get(jobId); if (job == null) { return; } //stop currentSearcher SearchRunner.Searcher currentSearcher = job.getCurrentSearcher(); if ((currentSearcher != null) && (!currentSearcher.isDone())) { currentSearcher.cancel(true); } jobs.remove(jobId); } } /** * Add these lists to all of the jobs. Used when user wants to search for a * list while ingest has already started. * * @param keywordListNames */ public synchronized void addKeywordListsToAllJobs(List<String> keywordListNames) { for (String listName : keywordListNames) { logger.log(Level.INFO, "Adding keyword list {0} to all jobs", listName); //NON-NLS for (SearchJobInfo j : jobs.values()) { j.addKeywordListName(listName); } } } /** * Commits index and notifies listeners of index update */ private void commit() { ingester.commit(); // Signal a potential change in number of text_ingested files try { final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles(); KeywordSearch.fireNumIndexedFilesChange(null, numIndexedFiles); } catch (NoOpenCoreException | KeywordSearchModuleException ex) { logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex); //NON-NLS } } /** * A final search waits for any still-running workers, and then executes a * new one and waits until that is done. * * @param job */ private void doFinalSearch(SearchJobInfo job) { // Run one last search as there are probably some new files committed logger.log(Level.INFO, "Running final search for jobid {0}", job.getJobId()); //NON-NLS if (!job.getKeywordListNames().isEmpty()) { try { // In case this job still has a worker running, wait for it to finish job.waitForCurrentWorker(); SearchRunner.Searcher finalSearcher = new SearchRunner.Searcher(job, true); job.setCurrentSearcher(finalSearcher); //save the ref finalSearcher.execute(); //start thread // block until the search is complete finalSearcher.get(); } catch (InterruptedException | ExecutionException ex) { logger.log(Level.WARNING, "Job {1} final search thread failed: {2}", new Object[]{job.getJobId(), ex}); //NON-NLS } } } /** * Timer triggered re-search for each job (does a single index commit first) */ private class UpdateTimerTask extends TimerTask { private final Logger logger = Logger.getLogger(SearchRunner.UpdateTimerTask.class.getName()); @Override public void run() { // If no jobs then cancel the task. If more job(s) come along, a new task will start up. if (jobs.isEmpty()) { this.cancel(); //terminate this timer task updateTimerRunning = false; return; } commit(); synchronized (SearchRunner.this) { // Spawn a search thread for each job for (Entry<Long, SearchJobInfo> j : jobs.entrySet()) { SearchJobInfo job = j.getValue(); // If no lists or the worker is already running then skip it if (!job.getKeywordListNames().isEmpty() && !job.isWorkerRunning()) { Searcher searcher = new Searcher(job); job.setCurrentSearcher(searcher); //save the ref searcher.execute(); //start thread job.setWorkerRunning(true); } } } } } /** * Data structure to keep track of keyword lists, current results, and * search running status for each jobid */ private class SearchJobInfo { private final long jobId; private final long dataSourceId; // mutable state: private volatile boolean workerRunning; private List<String> keywordListNames; //guarded by SearchJobInfo.this private Map<Keyword, List<Long>> currentResults; //guarded by SearchJobInfo.this private SearchRunner.Searcher currentSearcher; private AtomicLong moduleReferenceCount = new AtomicLong(0); private final Object finalSearchLock = new Object(); //used for a condition wait public SearchJobInfo(long jobId, long dataSourceId, List<String> keywordListNames) { this.jobId = jobId; this.dataSourceId = dataSourceId; this.keywordListNames = new ArrayList<>(keywordListNames); currentResults = new HashMap<>(); workerRunning = false; currentSearcher = null; } public long getJobId() { return jobId; } public long getDataSourceId() { return dataSourceId; } public synchronized List<String> getKeywordListNames() { return new ArrayList<>(keywordListNames); } public synchronized void addKeywordListName(String keywordListName) { if (!keywordListNames.contains(keywordListName)) { keywordListNames.add(keywordListName); } } public synchronized List<Long> currentKeywordResults(Keyword k) { return currentResults.get(k); } public synchronized void addKeywordResults(Keyword k, List<Long> resultsIDs) { currentResults.put(k, resultsIDs); } public boolean isWorkerRunning() { return workerRunning; } public void setWorkerRunning(boolean flag) { workerRunning = flag; } public synchronized SearchRunner.Searcher getCurrentSearcher() { return currentSearcher; } public synchronized void setCurrentSearcher(SearchRunner.Searcher searchRunner) { currentSearcher = searchRunner; } public void incrementModuleReferenceCount() { moduleReferenceCount.incrementAndGet(); } public long decrementModuleReferenceCount() { return moduleReferenceCount.decrementAndGet(); } /** * In case this job still has a worker running, wait for it to finish * * @throws InterruptedException */ public void waitForCurrentWorker() throws InterruptedException { synchronized (finalSearchLock) { while (workerRunning) { finalSearchLock.wait(); //wait() releases the lock } } } /** * Unset workerRunning and wake up thread(s) waiting on finalSearchLock */ public void searchNotify() { synchronized (finalSearchLock) { workerRunning = false; finalSearchLock.notify(); } } } /** * Searcher responsible for searching the current index and writing results * to blackboard and the inbox. Also, posts results to listeners as Ingest * data events. Searches entire index, and keeps track of only new results * to report and save. Runs as a background thread. */ private final class Searcher extends SwingWorker<Object, Void> { /** * Searcher has private copies/snapshots of the lists and keywords */ private SearchJobInfo job; private List<Keyword> keywords; //keywords to search private List<String> keywordListNames; // lists currently being searched private List<KeywordList> keywordLists; private Map<String, KeywordList> keywordToList; //keyword to list name mapping private AggregateProgressHandle progressGroup; private final Logger logger = Logger.getLogger(SearchRunner.Searcher.class.getName()); private boolean finalRun = false; Searcher(SearchJobInfo job) { this.job = job; keywordListNames = job.getKeywordListNames(); keywords = new ArrayList<>(); keywordToList = new HashMap<>(); keywordLists = new ArrayList<>(); //keywords are populated as searcher runs } Searcher(SearchJobInfo job, boolean finalRun) { this(job); this.finalRun = finalRun; } @Override @Messages("SearchRunner.query.exception.msg=Error performing query:") protected Object doInBackground() throws Exception { final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName") + (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : ""); final String pgDisplayName = displayName + (" (" + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.pendingMsg") + ")"); progressGroup = AggregateProgressFactory.createSystemHandle(pgDisplayName, null, new Cancellable() { @Override public boolean cancel() { logger.log(Level.INFO, "Cancelling the searcher by user."); //NON-NLS if (progressGroup != null) { progressGroup.setDisplayName(displayName + " " + NbBundle.getMessage(this.getClass(), "SearchRunner.doInBackGround.cancelMsg")); } return SearchRunner.Searcher.this.cancel(true); } }, null); updateKeywords(); ProgressContributor[] subProgresses = new ProgressContributor[keywords.size()]; int i = 0; for (Keyword keywordQuery : keywords) { subProgresses[i] = AggregateProgressFactory.createProgressContributor(keywordQuery.getSearchTerm()); progressGroup.addContributor(subProgresses[i]); i++; } progressGroup.start(); final StopWatch stopWatch = new StopWatch(); stopWatch.start(); try { progressGroup.setDisplayName(displayName); int keywordsSearched = 0; for (Keyword keywordQuery : keywords) { if (this.isCancelled()) { logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keywordQuery.getSearchTerm()); //NON-NLS return null; } final String queryStr = keywordQuery.getSearchTerm(); final KeywordList list = keywordToList.get(queryStr); //new subProgress will be active after the initial query //when we know number of hits to start() with if (keywordsSearched > 0) { subProgresses[keywordsSearched - 1].finish(); } KeywordSearchQuery keywordSearchQuery = null; boolean isRegex = !keywordQuery.searchTermIsLiteral(); if (isRegex) { keywordSearchQuery = new TermsComponentQuery(list, keywordQuery); } else { keywordSearchQuery = new LuceneQuery(list, keywordQuery); keywordSearchQuery.escape(); } // Filtering //limit search to currently ingested data sources //set up a filter with 1 or more image ids OR'ed final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, job.getDataSourceId()); keywordSearchQuery.addFilter(dataSourceFilter); QueryResults queryResults; // Do the actual search try { queryResults = keywordSearchQuery.performQuery(); } catch (KeywordSearchModuleException | NoOpenCoreException ex) { logger.log(Level.SEVERE, "Error performing query: " + keywordQuery.getSearchTerm(), ex); //NON-NLS MessageNotifyUtil.Notify.error(Bundle.SearchRunner_query_exception_msg() + keywordQuery.getSearchTerm(), ex.getCause().getMessage()); //no reason to continue with next query if recovery failed //or wait for recovery to kick in and run again later //likely case has closed and threads are being interrupted return null; } catch (CancellationException e) { logger.log(Level.INFO, "Cancel detected, bailing during keyword query: {0}", keywordQuery.getSearchTerm()); //NON-NLS return null; } // calculate new results by substracting results already obtained in this ingest // this creates a map of each keyword to the list of unique files that have that hit. QueryResults newResults = filterResults(queryResults); if (!newResults.getKeywords().isEmpty()) { // Write results to BB //new artifacts created, to report to listeners Collection<BlackboardArtifact> newArtifacts = new ArrayList<>(); //scale progress bar more more granular, per result sub-progress, within per keyword int totalUnits = newResults.getKeywords().size(); subProgresses[keywordsSearched].start(totalUnits); int unitProgress = 0; String queryDisplayStr = keywordQuery.getSearchTerm(); if (queryDisplayStr.length() > 50) { queryDisplayStr = queryDisplayStr.substring(0, 49) + "..."; } subProgresses[keywordsSearched].progress(list.getName() + ": " + queryDisplayStr, unitProgress); // Create blackboard artifacts newArtifacts = newResults.writeAllHitsToBlackBoard(null, subProgresses[keywordsSearched], this, list.getIngestMessages()); } //if has results //reset the status text before it goes away subProgresses[keywordsSearched].progress(""); ++keywordsSearched; } //for each keyword } //end try block catch (Exception ex) { logger.log(Level.WARNING, "searcher exception occurred", ex); //NON-NLS } finally { try { finalizeSearcher(); stopWatch.stop(); logger.log(Level.INFO, "Searcher took to run: {0} secs.", stopWatch.getElapsedTimeSecs()); //NON-NLS } finally { // In case a thread is waiting on this worker to be done job.searchNotify(); } } return null; } @Override protected void done() { // call get to see if there were any errors try { get(); } catch (InterruptedException | ExecutionException e) { logger.log(Level.SEVERE, "Error performing keyword search: " + e.getMessage()); //NON-NLS services.postMessage(IngestMessage.createErrorMessage(KeywordSearchModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "SearchRunner.Searcher.done.err.msg"), e.getMessage())); } // catch and ignore if we were cancelled catch (java.util.concurrent.CancellationException ex) { } } /** * Sync-up the updated keywords from the currently used lists in the XML */ private void updateKeywords() { XmlKeywordSearchList loader = XmlKeywordSearchList.getCurrent(); keywords.clear(); keywordToList.clear(); keywordLists.clear(); for (String name : keywordListNames) { KeywordList list = loader.getList(name); keywordLists.add(list); for (Keyword k : list.getKeywords()) { keywords.add(k); keywordToList.put(k.getSearchTerm(), list); } } } /** * Performs the cleanup that needs to be done right AFTER * doInBackground() returns without relying on done() method that is not * guaranteed to run. */ private void finalizeSearcher() { SwingUtilities.invokeLater(new Runnable() { @Override public void run() { progressGroup.finish(); } }); } //calculate new results but substracting results already obtained in this ingest //update currentResults map with the new results private QueryResults filterResults(QueryResults queryResult) { QueryResults newResults = new QueryResults(queryResult.getQuery(), queryResult.getKeywordList()); for (Keyword keyword : queryResult.getKeywords()) { List<KeywordHit> queryTermResults = queryResult.getResults(keyword); //translate to list of IDs that we keep track of List<Long> queryTermResultsIDs = new ArrayList<>(); for (KeywordHit ch : queryTermResults) { queryTermResultsIDs.add(ch.getSolrObjectId()); } List<Long> curTermResults = job.currentKeywordResults(keyword); if (curTermResults == null) { job.addKeywordResults(keyword, queryTermResultsIDs); newResults.addResult(keyword, queryTermResults); } else { //some AbstractFile hits already exist for this keyword for (KeywordHit res : queryTermResults) { if (!curTermResults.contains(res.getSolrObjectId())) { //add to new results List<KeywordHit> newResultsFs = newResults.getResults(keyword); if (newResultsFs == null) { newResultsFs = new ArrayList<>(); newResults.addResult(keyword, newResultsFs); } newResultsFs.add(res); curTermResults.add(res.getSolrObjectId()); } } } } return newResults; } } }