RawText.java example

Explorer
autopsy-master
/*
 * Autopsy Forensic Browser
 *
 * Copyright 2011-2016 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.sleuthkit.autopsy.keywordsearch;

import java.util.LinkedHashMap;
import java.util.logging.Level;
import org.apache.solr.client.solrj.SolrServerException;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.TskData;

/**
 * A "source" for the extracted content viewer that displays "raw" (not
 * highlighted) indexed text for a file or an artifact.
 */
class RawText implements IndexedText {

    private int numPages = 0;
    private int currentPage = 0;
    private boolean hasChunks = false;
    private final Content content;
    private final BlackboardArtifact blackboardArtifact;
    private final long objectId;
    //keep last content cached
    private String cachedString;
    private int cachedChunk;
    private static final Logger logger = Logger.getLogger(RawText.class.getName());

    /**
     * Construct a new RawText object for the given content and object id. This
     * constructor needs both a content object and an object id because the
     * RawText implementation attempts to provide useful messages in the text
     * content viewer for (a) the case where a file has not been indexed because
     * known files are being skipped and (b) the case where the file content has
     * not yet been indexed.
     *
     * @param content  Used to get access to file names and "known" status.
     * @param objectId Either a file id or an artifact id.
     */
    RawText(Content content, long objectId) {
        this.content = content;
        this.blackboardArtifact = null;
        this.objectId = objectId;
        initialize();
    }

    RawText(BlackboardArtifact bba, long objectId) {
        this.content = null;
        this.blackboardArtifact = bba;
        this.objectId = objectId;
        initialize();
    }

    /**
     * Return the ID that this object is associated with -- to help with caching
     *
     * @return
     */
    public long getObjectId() {
        return this.objectId;
    }

    @Override
    public int getCurrentPage() {
        return this.currentPage;
    }

    @Override
    public boolean hasNextPage() {
        return currentPage < numPages;
    }

    @Override
    public boolean hasPreviousPage() {
        return currentPage > 1;
    }

    @Override
    public int nextPage() {
        if (!hasNextPage()) {
            throw new IllegalStateException(
                    NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.nextPage.exception.msg"));
        }
        ++currentPage;
        return currentPage;
    }

    @Override
    public int previousPage() {
        if (!hasPreviousPage()) {
            throw new IllegalStateException(
                    NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.previousPage.exception.msg"));
        }
        --currentPage;
        return currentPage;
    }

    @Override
    public boolean hasNextItem() {
        throw new UnsupportedOperationException(
                NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.hasNextItem.exception.msg"));
    }

    @Override
    public boolean hasPreviousItem() {
        throw new UnsupportedOperationException(
                NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.hasPreviousItem.exception.msg"));
    }

    @Override
    public int nextItem() {
        throw new UnsupportedOperationException(
                NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.nextItem.exception.msg"));
    }

    @Override
    public int previousItem() {
        throw new UnsupportedOperationException(
                NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.previousItem.exception.msg"));
    }

    @Override
    public int currentItem() {
        throw new UnsupportedOperationException(
                NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.currentItem.exception.msg"));
    }

    @Override
    public String getText() {
        try {
            if (this.content != null) {
                return getContentText(currentPage, hasChunks);
            } else if (this.blackboardArtifact != null) {
                return getArtifactText();
            }
        } catch (SolrServerException ex) {
            logger.log(Level.SEVERE, "Couldn't get extracted content", ex); //NON-NLS
        }
        return NbBundle.getMessage(this.getClass(), "RawText.getText.error.msg");
    }

    @NbBundle.Messages({
        "RawText.FileText=File Text",
        "RawText.ResultText=Result Text"})
    @Override
    public String toString() {
        if (null != content) {
            return Bundle.RawText_FileText();
        } else {
            return Bundle.RawText_ResultText();
        }
    }

    @Override
    public boolean isSearchable() {
        return false;
    }

    @Override
    public String getAnchorPrefix() {
        return "";
    }

    @Override
    public int getNumberHits() {
        return 0;
    }

    @Override
    public LinkedHashMap<Integer, Integer> getHitsPages() {
        return null;
    }

    @Override
    public int getNumberPages() {
        return numPages;
    }

    /**
     * Set the internal values, such as pages and chunks
     */
    private void initialize() {
        final Server solrServer = KeywordSearch.getServer();

        try {
            //add to page tracking if not there yet		
            numPages = solrServer.queryNumFileChunks(this.objectId);
            if (numPages == 0) {
                numPages = 1;
                hasChunks = false;
            } else {
                hasChunks = true;
            }
        } catch (KeywordSearchModuleException ex) {
            logger.log(Level.WARNING, "Could not get number of chunks: ", ex); //NON-NLS		

        } catch (NoOpenCoreException ex) {
            logger.log(Level.WARNING, "Could not get number of chunks: ", ex); //NON-NLS		
        }
    }

    /**
     * Get extracted content for a node from Solr
     *
     * @param node        a node that has extracted content in Solr (check with
     *                    solrHasContent(ContentNode))
     * @param currentPage currently used page
     * @param hasChunks   true if the content behind the node has multiple
     *                    chunks. This means we need to address the content
     *                    pages specially.
     *
     * @return the extracted content
     *
     * @throws SolrServerException if something goes wrong
     */
    private String getContentText(int currentPage, boolean hasChunks) throws SolrServerException {
        final Server solrServer = KeywordSearch.getServer();

        if (hasChunks == false) {
            //if no chunks, it is safe to assume there is no text content
            //because we are storing extracted text in chunks only
            //and the non-chunk stores meta-data only
            String name = content.getName();
            String msg = null;
            if (content instanceof AbstractFile) {
                //we know it's AbstractFile, but do quick check to make sure if we index other objects in future
                boolean isKnown = TskData.FileKnown.KNOWN.equals(((AbstractFile) content).getKnown());
                if (isKnown && KeywordSearchSettings.getSkipKnown()) {
                    msg = NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.getSolrContent.knownFileMsg", name);
                }
            }
            if (msg == null) {
                msg = NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.getSolrContent.noTxtYetMsg", name);
            }
            String htmlMsg = NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.getSolrContent.txtBodyItal", msg);
            return htmlMsg;
        }

        int chunkId = currentPage;

        //check if cached
        if (cachedString != null) {
            if (cachedChunk == chunkId) {
                return cachedString;
            }
        }

        //not cached
        try {
            String indexedText = solrServer.getSolrContent(this.objectId, chunkId);
            cachedString = EscapeUtil.escapeHtml(indexedText).trim();
            StringBuilder sb = new StringBuilder(cachedString.length() + 20);
            sb.append("<pre>").append(cachedString).append("</pre>"); //NON-NLS
            cachedString = sb.toString();
            cachedChunk = chunkId;
        } catch (NoOpenCoreException ex) {
            logger.log(Level.SEVERE, "No open core", ex); //NON-NLS
            return "";
        }
        return cachedString;
    }
    
    private String getArtifactText() throws SolrServerException{
        try {
            String indexedText = KeywordSearch.getServer().getSolrContent(this.objectId, 1);
            indexedText = EscapeUtil.escapeHtml(indexedText).trim();
            StringBuilder sb = new StringBuilder(indexedText.length() + 20);
            sb.append("<pre>").append(indexedText).append("</pre>"); //NON-NLS
            return sb.toString();
        } catch (NoOpenCoreException ex) {
            logger.log(Level.SEVERE, "No open core", ex); //NON-NLS
            return "";
        }
    }
    
}