/* * Autopsy Forensic Browser * * Copyright 2011-2015 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sleuthkit.autopsy.keywordsearch; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.TreeSet; import java.util.logging.Level; import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.Logger; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrRequest.METHOD; import org.apache.solr.client.solrj.response.QueryResponse; import org.openide.util.NbBundle.Messages; import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; import org.sleuthkit.autopsy.coreutils.Version; import org.sleuthkit.autopsy.datamodel.TextMarkupLookup; import org.sleuthkit.autopsy.keywordsearch.KeywordQueryFilter.FilterType; /** * Highlights hits for a given document. Knows about pages and such for the * content viewer. */ class HighlightedText implements IndexedText, TextMarkupLookup { private static final Logger logger = Logger.getLogger(HighlightedText.class.getName()); private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS private static final String HIGHLIGHT_POST = "</span>"; //NON-NLS private static final String ANCHOR_PREFIX = HighlightedText.class.getName() + "_"; private long objectId; private String keywordHitQuery; private Server solrServer; private int numberPages; private int currentPage; private boolean isRegex = false; private boolean group = true; private boolean hasChunks = false; //stores all pages/chunks that have hits as key, and number of hits as a value, or 0 if yet unknown private LinkedHashMap<Integer, Integer> hitsPages; //stored page num -> current hit number mapping private HashMap<Integer, Integer> pagesToHits; private List<Integer> pages; private QueryResults hits = null; //original hits that may get passed in private String originalQuery = null; //or original query if hits are not available private boolean isPageInfoLoaded = false; private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT); HighlightedText(long objectId, String keywordHitQuery, boolean isRegex) { this.objectId = objectId; this.keywordHitQuery = keywordHitQuery; this.isRegex = isRegex; this.group = true; this.hitsPages = new LinkedHashMap<>(); this.pages = new ArrayList<>(); this.pagesToHits = new HashMap<>(); this.solrServer = KeywordSearch.getServer(); this.numberPages = 0; this.currentPage = 0; //hits are unknown } //when the results are not known and need to requery to get hits HighlightedText(long objectId, String solrQuery, boolean isRegex, String originalQuery) { this(objectId, KeywordSearchUtil.quoteQuery(solrQuery), isRegex); this.originalQuery = originalQuery; } HighlightedText(long objectId, String solrQuery, boolean isRegex, QueryResults hits) { this(objectId, solrQuery, isRegex); this.hits = hits; } HighlightedText(long objectId, String solrQuery, boolean isRegex, boolean group, QueryResults hits) { this(objectId, solrQuery, isRegex, hits); this.group = group; } /** * The main goal of this method is to figure out which pages / chunks have * hits. */ @Messages({"HighlightedText.query.exception.msg=Could not perform the query to get chunk info and get highlights:"}) private void loadPageInfo() { if (isPageInfoLoaded) { return; } try { this.numberPages = solrServer.queryNumFileChunks(this.objectId); } catch (KeywordSearchModuleException ex) { logger.log(Level.WARNING, "Could not get number pages for content: " + this.objectId); //NON-NLS return; } catch (NoOpenCoreException ex) { logger.log(Level.WARNING, "Could not get number pages for content: " + this.objectId); //NON-NLS return; } if (this.numberPages == 0) { hasChunks = false; } else { hasChunks = true; } //if has chunks, get pages with hits if (hasChunks) { //extract pages of interest, sorted /* * If this is being called from the artifacts / dir tree, then we * need to perform the search to get the highlights. */ if (hits == null) { String queryStr = KeywordSearchUtil.escapeLuceneQuery(this.keywordHitQuery); if (isRegex) { //use white-space sep. field to get exact matches only of regex query result queryStr = Server.Schema.CONTENT_WS + ":" + "\"" + queryStr + "\""; } Keyword keywordQuery = new Keyword(queryStr, !isRegex); List<Keyword> keywords = new ArrayList<>(); keywords.add(keywordQuery); KeywordSearchQuery chunksQuery = new LuceneQuery(new KeywordList(keywords), keywordQuery); chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId)); try { hits = chunksQuery.performQuery(); } catch (KeywordSearchModuleException | NoOpenCoreException ex) { logger.log(Level.SEVERE, "Could not perform the query to get chunk info and get highlights:" + keywordQuery.getSearchTerm(), ex); //NON-NLS MessageNotifyUtil.Notify.error(Bundle.HighlightedText_query_exception_msg() + keywordQuery.getSearchTerm(), ex.getCause().getMessage()); return; } } //organize the hits by page, filter as needed TreeSet<Integer> pagesSorted = new TreeSet<>(); for (Keyword k : hits.getKeywords()) { for (KeywordHit hit : hits.getResults(k)) { int chunkID = hit.getChunkId(); if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) { pagesSorted.add(chunkID); } } } //set page to first page having highlights if (pagesSorted.isEmpty()) { this.currentPage = 0; } else { this.currentPage = pagesSorted.first(); } for (Integer page : pagesSorted) { hitsPages.put(page, 0); //unknown number of matches in the page pages.add(page); pagesToHits.put(page, 0); //set current hit to 0th } } else { //no chunks this.numberPages = 1; this.currentPage = 1; hitsPages.put(1, 0); pages.add(1); pagesToHits.put(1, 0); } isPageInfoLoaded = true; } //constructor for dummy singleton factory instance for Lookup private HighlightedText() { } long getObjectId() { return this.objectId; } @Override public int getNumberPages() { return this.numberPages; //return number of pages that have hits //return this.hitsPages.keySet().size(); } @Override public int getCurrentPage() { return this.currentPage; } @Override public boolean hasNextPage() { final int numPages = pages.size(); int idx = pages.indexOf(this.currentPage); return idx < numPages - 1; } @Override public boolean hasPreviousPage() { int idx = pages.indexOf(this.currentPage); return idx > 0; } @Override public int nextPage() { if (!hasNextPage()) { throw new IllegalStateException( NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.nextPage.exception.msg")); } int idx = pages.indexOf(this.currentPage); currentPage = pages.get(idx + 1); return currentPage; } @Override public int previousPage() { if (!hasPreviousPage()) { throw new IllegalStateException( NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.previousPage.exception.msg")); } int idx = pages.indexOf(this.currentPage); currentPage = pages.get(idx - 1); return currentPage; } @Override public boolean hasNextItem() { if (!this.pagesToHits.containsKey(currentPage)) { return false; } return this.pagesToHits.get(currentPage) < this.hitsPages.get(currentPage); } @Override public boolean hasPreviousItem() { if (!this.pagesToHits.containsKey(currentPage)) { return false; } return this.pagesToHits.get(currentPage) > 1; } @Override public int nextItem() { if (!hasNextItem()) { throw new IllegalStateException( NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.nextItem.exception.msg")); } int cur = pagesToHits.get(currentPage) + 1; pagesToHits.put(currentPage, cur); return cur; } @Override public int previousItem() { if (!hasPreviousItem()) { throw new IllegalStateException( NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.previousItem.exception.msg")); } int cur = pagesToHits.get(currentPage) - 1; pagesToHits.put(currentPage, cur); return cur; } @Override public int currentItem() { if (!this.pagesToHits.containsKey(currentPage)) { return 0; } return pagesToHits.get(currentPage); } @Override public LinkedHashMap<Integer, Integer> getHitsPages() { return this.hitsPages; } @Override public String getText() { loadPageInfo(); //inits once String highLightField = null; if (isRegex) { highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX; } else { highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL; } SolrQuery q = new SolrQuery(); q.setShowDebugInfo(DEBUG); //debug // input query has already been properly constructed and escaped q.setQuery(keywordHitQuery); String contentIdStr = Long.toString(this.objectId); if (hasChunks) { contentIdStr += "_" + Integer.toString(this.currentPage); } final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr); q.addFilterQuery(filterQuery); q.addHighlightField(highLightField); //for exact highlighting, try content_ws field (with stored="true" in Solr schema) //q.setHighlightSimplePre(HIGHLIGHT_PRE); //original highlighter only //q.setHighlightSimplePost(HIGHLIGHT_POST); //original highlighter only q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH //tune the highlighter q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS //docs says makes sense for the original Highlighter only, but not really q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS try { QueryResponse response = solrServer.query(q, METHOD.POST); Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting(); Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr); if (responseHighlightID == null) { return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg"); } List<String> contentHighlights = responseHighlightID.get(highLightField); if (contentHighlights == null) { return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg"); } else { // extracted content (minus highlight tags) is HTML-escaped String highlightedContent = contentHighlights.get(0).trim(); highlightedContent = insertAnchors(highlightedContent); return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS } } catch (Exception ex) { logger.log(Level.WARNING, "Error executing Solr highlighting query: " + keywordHitQuery, ex); //NON-NLS return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.queryFailedMsg"); } } @Override public String toString() { return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.toString"); } @Override public boolean isSearchable() { return true; } @Override public String getAnchorPrefix() { return ANCHOR_PREFIX; } @Override public int getNumberHits() { if (!this.hitsPages.containsKey(this.currentPage)) { return 0; } return this.hitsPages.get(this.currentPage); } private String insertAnchors(String searchableContent) { int searchOffset = 0; int index = -1; StringBuilder buf = new StringBuilder(searchableContent); final String searchToken = HIGHLIGHT_PRE; final int indexSearchTokLen = searchToken.length(); final String insertPre = "<a name='" + ANCHOR_PREFIX; //NON-NLS final String insertPost = "'></a>"; //NON-NLS int count = 0; while ((index = buf.indexOf(searchToken, searchOffset)) >= 0) { String insertString = insertPre + Integer.toString(count + 1) + insertPost; int insertStringLen = insertString.length(); buf.insert(index, insertString); searchOffset = index + indexSearchTokLen + insertStringLen; //next offset past this anchor ++count; } //store total hits for this page, now that we know it this.hitsPages.put(this.currentPage, count); if (this.currentItem() == 0 && this.hasNextItem()) { this.nextItem(); } return buf.toString(); } //dummy instance for Lookup only private static TextMarkupLookup instance = null; //getter of the singleton dummy instance solely for Lookup purpose //this instance does not actually work with Solr public static synchronized TextMarkupLookup getDefault() { if (instance == null) { instance = new HighlightedText(); } return instance; } @Override // factory method to create an instance of this object public TextMarkupLookup createInstance(long objectId, String keywordHitQuery, boolean isRegex, String originalQuery) { return new HighlightedText(objectId, keywordHitQuery, isRegex, originalQuery); } }