ResultContentProvider.java example

Explorer

eclipse-instasearch-master
- instasearch
  - src
    - it
      - unibz
        instasearch
        InstaSearch.java
        InstaSearchPlugin.java
        InstaSearchStartup.java
        actions
        BuildIndexActionDelegate.java
        CheckUpdatesActionDelegate.java
        CollapseAllActionDelegate.java
        DeleteIndexActionDelegate.java
        ExpandAllActionDelegate.java
        IncrementalSearchActionDelegate.java
        InstaSearchActionDelegate.java
        OpenSearchDialog.java
        PreferencesAction.java
        ShowExceptionAction.java
        ShowInstaSearchAction.java
        VisitHomePageActionDelegate.java
        indexing
        Field.java
        FileAnalyzer.java
        LengthNormSimilarity.java
        QueryAnalyzer.java
        ResourceCollector.java
        SearchQuery.java
        SearchResult.java
        SearchResultDoc.java
        Searcher.java
        StorageIndexer.java
        WorkspaceIndexer.java
        WorkspaceIndexerJDT.java
        querying
        CSVExpander.java
        CurrentProjectSetter.java
        FieldAliasConverter.java
        FileNameSearcher.java
        FilterSetter.java
        FolderSearcher.java
        LastTermQueryPrefixer.java
        LowercaseConverter.java
        ModifiedTimeConverter.java
        PhraseSearcher.java
        QueryFuzzifier.java
        QueryVisitor.java
        UppercaseNameExpander.java
        VisitableQuery.java
        WorkingSetExpander.java
        tokenizers
        CamelCaseTokenizer.java
        DotSplitTokenizer.java
        TermSplitTokenizer.java
        WordSplitTokenizer.java
        standard
        CharStream.java
        FastCharStream.java
        ParseException.java
        StandardTokenizer.java
        StandardTokenizerConstants.java
        StandardTokenizerTokenManager.java
        Token.java
        TokenMgrError.java
        jobs
        CheckUpdatesJob.java
        DeleteIndexJob.java
        IndexUpdateJob.java
        IndexingJob.java
        UpdatePluginJob.java
        prefs
        InstaSearchPreferencePage.java
        PreferenceConstants.java
        PreferenceInitializer.java
        ui
        DropdownMenuProvider.java
        InstaSearchPage.java
        InstaSearchUI.java
        InstaSearchView.java
        MatchHighlightJob.java
        ReportErrorDialog.java
        ResultContentProvider.java
        ResultLabelProvider.java
        SearchContentProposalProvider.java
        SearchJob.java
        SearchViewControl.java
        StyledTextContentAdapter.java
  - test
    - it
      - unibz
        instasearch
        indexing
        SearcherTest.java
        TestSearcher.java
        TestStorage.java

/*
 * Copyright (c) 2009 Andrejs Jermakovics.
 * 
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     Andrejs Jermakovics - initial implementation
 */
package it.unibz.instasearch.ui;

import it.unibz.instasearch.InstaSearch;
import it.unibz.instasearch.InstaSearchPlugin;
import it.unibz.instasearch.indexing.Field;
import it.unibz.instasearch.indexing.SearchQuery;
import it.unibz.instasearch.indexing.SearchResult;
import it.unibz.instasearch.indexing.SearchResultDoc;
import it.unibz.instasearch.indexing.Searcher;
import it.unibz.instasearch.indexing.StorageIndexer;
import it.unibz.instasearch.indexing.WorkspaceIndexer;
import it.unibz.instasearch.prefs.PreferenceConstants;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

import org.eclipse.core.resources.IFile;
import org.eclipse.core.resources.IStorage;
import org.eclipse.jface.viewers.ITreeContentProvider;
import org.eclipse.jface.viewers.Viewer;
import org.eclipse.search.ui.text.Match;
import org.eclipse.ui.IEditorInput;

class ResultContentProvider implements ITreeContentProvider {
	
	private static final String[] NO_INDEX_MESSAGE = new String[] {"Index is not built"};
	private static final String[] NO_FILE_MESSAGE = new String[] {"File missing"};
	private static final String NO_RESULTS_MESSAGE = "0 matches";
	
	public static final double MAX_LINE_SIMILARITY = 0.9; // if above that, only one of the similar lines is kept
	public static final int MAX_LINES_TO_PROCESS = 5000; // max nr of lines to read from files (prevent slowdown on huge files)
	
	private WorkspaceIndexer indexer;
	private Searcher searcher;
	
	/**
	 * Map of search terms to their boost score
	 */
	private SearchQuery currentSearchQuery;
	private Map<String, Float> searchTerms;
	
	private Object[] cachedResults;
	private SearchResultDoc cachedResultDoc;
	private Object[] cachedChildren;
	private int resultCount;
	
	
	public ResultContentProvider() {
		InstaSearch instaSearch = InstaSearchPlugin.getInstaSearch();
		
		this.indexer = instaSearch.getIndexer();
		this.searcher = instaSearch.getSearcher();
	}
	
	public void inputChanged(Viewer v, Object oldInput, Object newSearch) {
		if( newSearch==null || !(newSearch instanceof SearchQuery) ) {
			currentSearchQuery = null; // clear cache
			cachedResults = null;
		}	
	}
	
	public void dispose() {	
	}
	
	public Object[] getElements(Object searchQueryObj) 
	{	
		if( searchQueryObj==null || !(searchQueryObj instanceof SearchQuery) )
			return Collections.EMPTY_LIST.toArray(); // not searching
		
		SearchQuery searchQuery = (SearchQuery) searchQueryObj;
		String searchString = searchQuery.getSearchString();
		
		if( searchString==null || searchString.toString().length() < Searcher.MIN_QUERY_LENGTH )
			return Collections.EMPTY_LIST.toArray();
		
		try
		{
			if( ! indexer.isIndexed() )
				return NO_INDEX_MESSAGE;
		} catch(Exception e)
		{
			InstaSearchPlugin.log(e);
			return new Exception[]{e};
		}
		
		if( searchQuery.equals(currentSearchQuery) && cachedResults != null ) // same query
			return cachedResults;
		
		SearchResult result = null;
		cachedResults = null;
		resultCount = 0;
		Object[] resultArray = null;
		
		if(searchString != null) 
		{
			try {
				result = searcher.search(searchQuery); // do the search
				currentSearchQuery = searchQuery;
				
				if( result == null ) {
					if( !searchQuery.isFuzzy() ) {
						SearchQuery newQuery = new SearchQuery(searchQuery);
						newQuery.setExact(false);
						newQuery.setFuzzy(true);
						resultArray = new Object[]{NO_RESULTS_MESSAGE, newQuery}; // add fuzzy query
					} else {
						resultArray = new Object[]{NO_RESULTS_MESSAGE};
					}
					
					cachedResults = resultArray;
					
					return resultArray;
				}
				
				searchTerms = result.getSearchTerms();
				
			} catch (Exception e) {
				InstaSearchPlugin.log(e);
				return new Exception[]{e};
			}
		}
		
		List<SearchResultDoc> resultDocs = result.getResultDocs();
		this.resultCount = resultDocs.size();
		
		boolean addMoreResults = false, addFindSimilar = false;
		
		if( searchQuery.isLimited() && result.isFull() ) { // if only showing limited number of matches
			addMoreResults = true;
		}
		else
		{
			if( searchQuery.isExact() && !searchQuery.isFuzzy() ) // if query is exact, can try search for individual tokens
				addFindSimilar = true;
		}
		
		if( addMoreResults || addFindSimilar )
			resultArray = new Object[resultCount + 1];  // +1 because we append "More..." element (a SearchQuery object)
		else
			resultArray = new Object[resultCount];
		
		resultDocs.toArray(resultArray);
		
		if( addMoreResults ) { // if more results, create More result entry (return all elements)
			SearchQuery moreResultsQuery = new SearchQuery(currentSearchQuery);
			moreResultsQuery.setMaxResults(SearchQuery.UNLIMITED_RESULTS); // all results
			resultArray[resultCount] = moreResultsQuery;
		}
		else if( addFindSimilar )
		{
			SearchQuery findSimilarQuery = new SearchQuery(currentSearchQuery);
			findSimilarQuery.setExact(false);
			resultArray[resultCount] = findSimilarQuery;
		}
		
		cachedResults = resultArray;
		
		return resultArray;
	}
	
	/**
	 *  Count of search result docs from last search
	 * @return result count
	 *  
	 */
	public int getResultCount()
	{
		return resultCount; 
	}
	
	/**
	 * Returns last search result elements.
	 * Includes search result docs and additional action entries (eg More results)
	 * 
	 * @return  array of last search result elements
	 */
	public Object[] getElements() {
		return cachedResults;
	}
	
	public Object[] getChildren(Object parent) 
	{
		if( parent instanceof SearchResultDoc ) {
			
			SearchResultDoc doc = (SearchResultDoc) parent;
			
			if( cachedResultDoc != null && doc.equals(cachedResultDoc) )
				return cachedChildren; // cache results
			
			List<MatchLine> matches = null;
			Object[] children = null;
			
			try {
				matches = getMatchLines(doc, true, null);
				if( matches != null )
					children = matches.toArray();
			} catch (Exception e) {
				InstaSearchPlugin.log(e);
			}
			
			if( matches == null )
				children = NO_FILE_MESSAGE;
			
			cachedResultDoc = doc;
			cachedChildren = children;
			
			return children;
		}
		else if( parent instanceof Exception ) {
			Exception e = (Exception) parent;
			return e.getStackTrace();
		}
		
		return Collections.EMPTY_LIST.toArray();
	}
	
	public interface MatchFindCallback
	{
		void matchFound(MatchLine line);
		boolean isCanceled();
	}
	
	/**
	 * Returns matched lines
	 * @param doc
	 * @param limit
	 * @return
	 * @throws Exception
	 */
	List<MatchLine> getMatchLines(SearchResultDoc doc, boolean limit, MatchFindCallback callback) throws Exception {
		
		if( searchTerms == null || currentSearchQuery == null )
			return Collections.emptyList();
		
		int maxMatches = InstaSearchPlugin.getIntPref(PreferenceConstants.P_SHOWN_LINES_COUNT);
		List<MatchLine> matchedLines = new ArrayList<MatchLine>();
		int matchCount = doc.getMatchCount();
		String searchString = currentSearchQuery.getSearchString().toLowerCase(Locale.ENGLISH);
		
		IStorage f = getStorage(doc);
		if( f == null ) {
			// index might be outdated (disabled updating)
			//TODO: remove file from index (update index)
			return null;
		}
		
		InputStream fileInputStream = null;
		
		if( f instanceof IFile ) {
			IFile file = (IFile) f;
			if( !file.exists() )
				return null;
			fileInputStream = file.getContents(true);
		} else {
			fileInputStream = f.getContents();
		}
		
		LineNumberReader lineReader = new LineNumberReader(new InputStreamReader(fileInputStream)); // is a buffered reader
		
		String line;
		
		// Read through file one line at a time 
		while ( (line = lineReader.readLine()) != null ) {
			
			if( callback != null && callback.isCanceled() ) break;
			//if( currentSearchQuery.isCanceled() ) break;
			
			if( "".equals(line) ) continue;
			
			Map<String, List<Integer>> lineTerms = StorageIndexer.extractTextTerms(line);
			if( lineTerms.isEmpty() ) continue;
			
			HashSet<String> matchedTerms = new HashSet<String>(searchTerms.keySet()); // search terms that appear on this line
			matchedTerms.retainAll(lineTerms.keySet());
			
			if( matchedTerms.isEmpty() && matchCount != 0 && limit ) // if have matches in general, but not on this line, then skip
				continue;
			
			float[] lineTermScoreVector = doc.getTermScoreVector(lineTerms.keySet());
			float[] matchedTermScoreVector = doc.getTermScoreVector(matchedTerms);
			
			MatchLine matchLine = new MatchLine(doc, line, lineReader.getLineNumber(), matchedTerms, lineTermScoreVector, matchedTermScoreVector);
			matchedLines.add(matchLine);
			
			addMatches(matchLine, lineTerms, matchedTerms, searchString);
			if( callback != null )
				callback.matchFound(matchLine);
			
			if( lineReader.getLineNumber() > MAX_LINES_TO_PROCESS )
				break;
			
			//TODO: break if all current matches have high score (eg >0.9)
		}
		
		lineReader.close();
		
		if(limit && matchedLines.size() > maxMatches) {	
			matchedLines = getTopMatchLines(maxMatches, matchedLines); // return TOP N lines
			return matchedLines;
			
		} else
			return matchedLines;
	}

	private List<MatchLine> getTopMatchLines(int maxMatchLines, List<MatchLine> matchedLines) {
		Collections.sort(matchedLines); // sort by match count, score, line
		removeSimilarLines(matchedLines, maxMatchLines);
		matchedLines = matchedLines.subList(0, maxMatchLines); // top N results
		
		Collections.sort(matchedLines, new Comparator<MatchLine>() { // sort by line number for display
			public int compare(MatchLine l1, MatchLine l2) {
				return l1.getLineNumber() - l2.getLineNumber();
			}
		});
		
		return matchedLines;
	}

	/**
	 * Find matches on the line
	 * 
	 * @param matchLine
	 * @param terms
	 * @param matchedTerms
	 * @param searchString
	 * @return
	 */
	private float addMatches(MatchLine matchLine, Map<String, List<Integer>> terms, 
			Set<String> matchedTerms, String searchString) {
		
		String lcaseLine = matchLine.getLine().toLowerCase(Locale.ENGLISH);
		
		if( !matchedTerms.contains(searchString) && !currentSearchQuery.isFuzzy() ) { // check for exact match on the line
			
			int pos = lcaseLine.indexOf(searchString);
			
			while( pos != -1 ) {
				Match m = new Match(searchString, pos, searchString.length());
				matchLine.add(m, true);
				pos = lcaseLine.indexOf(searchString, pos + searchString.length() - 1);
			}
		}
		
		float matchedTermBoost = 0;
		
		for(String term: matchedTerms) {							
			List<Integer> offsets = terms.get(term);
			
			for(int offset: offsets) {
				int pos = lcaseLine.indexOf(term, offset);
				if( pos == -1 ) continue;
				Match m = new Match(term, pos, term.length());
				matchLine.add(m);
			}
			
			float boost = searchTerms.get(term);
			
			matchedTermBoost += boost;
		}
		
		matchLine.setMatchedTermBoost(matchedTermBoost);
		
		return matchedTermBoost;
	}
	
	/**
	 * Removes similar lines from line matches.
	 * Even if they are high scored, we don not want to see the same lines again
	 * Line similarity is based on Cosine between their corresponding term vectors
	 * 
	 * @param matchedLines
	 * @param maxMatches
	 */
	private void removeSimilarLines(List<MatchLine> matchedLines, int maxMatches)
	{
		MatchLine curMatchLine = null;
		
		int lineNr = 0;
		for (Iterator<MatchLine> iterator = matchedLines.iterator(); 
					iterator.hasNext() && matchedLines.size()>maxMatches; )
		{
			MatchLine matchLine = iterator.next();
			
			if( curMatchLine == null ) {
				curMatchLine = matchLine;
				lineNr++;
				continue;
			}
			
			double similarity = getLineSimilarity(curMatchLine, matchLine);
			
			if( similarity > MAX_LINE_SIMILARITY )
				iterator.remove(); // since lines are sorted by score, lowest score line will be removed
			else {
				curMatchLine = matchLine;
				lineNr++;	
			}
			
			if( lineNr == maxMatches )
				break;
		}
	}

	/**
	 * Calculates similarity based on the Cosine angle between score vectors of each line.
	 * 
	 * @param lineMatches1
	 * @param lineMatches2
	 * @return
	 */
	private double getLineSimilarity(MatchLine lineMatches1, MatchLine lineMatches2) 
	{
		float[] vect1 = lineMatches1.getScoreVector();
		float[] vect2 = lineMatches2.getScoreVector();
		
		double dotProduct = 0.0;
		double magnitude1 = 0.0;
		double magnitude2 = 0.0;
		for (int i = 0; i < vect1.length ; i++) {
		    double val1 = vect1[i];
		    double val2 = vect2[i];
		    magnitude1 += val1 * val1;
		    magnitude2 += val2 * val2;
		    dotProduct += val1 * val2;
		}
		magnitude1 = Math.sqrt(magnitude1);
		magnitude2 = Math.sqrt(magnitude2);
		return (magnitude1 == 0 || magnitude2 == 0)
		    ? 0
		    : dotProduct / (magnitude1 * magnitude2);
		
	}

	public Object getParent(Object element) {
		return null;
	}
	
	public boolean hasChildren(Object element) {
		return ( element instanceof SearchResultDoc ) || ( element instanceof SearchQuery );
	}
	
	public Collection<String> getSearchTerms() {
		return searchTerms.keySet();
	}

	public IEditorInput getEditorInput(SearchResultDoc doc) throws Exception {
		return indexer.getEditorInput(doc);
	}
	
	public IStorage getStorage(SearchResultDoc doc) throws Exception {
		return indexer.getStorage(doc);
	}
	
	public List<String> getProposals(String prefix, Field field) throws IOException
	{
		List<String> ucaseProposals = searcher.getProposals(prefix.toUpperCase(), field);
		
		if( prefix.toUpperCase().equals(prefix.toLowerCase(Locale.ENGLISH)))
			return ucaseProposals;
		
		List<String> lcaseProposals = searcher.getProposals(prefix.toLowerCase(Locale.ENGLISH), field);
		
		ucaseProposals.addAll(lcaseProposals);
		Collections.sort(ucaseProposals, String.CASE_INSENSITIVE_ORDER);
		
		return ucaseProposals;
	}
	
	/**
	 * A class representing a line in a document and containing some keyword matches
	 */
	class MatchLine implements Comparable<MatchLine> {
		
		private String lineText;
		private List<Match> matches = new LinkedList<Match>();
		private SearchResultDoc doc;
		private int lineNumber;
	
		private double termScore;
		private double matchedTermScore;
		private float[] scoreVector;
		private float matchedTermBoost;
		private int exactMatches;
		private int matchedTermCount;
		
		private MatchLine(SearchResultDoc doc, String lineText, int lineNumber, Set<String> matchedTerms, float[] termScoreVector, float[] matchedTermScoreVector) throws IOException {
			this.doc = doc;
			this.lineText = lineText;
			this.lineNumber = lineNumber;
			this.scoreVector = termScoreVector;
			
			termScore = getMagnitude(termScoreVector);
			matchedTermScore = getMagnitude(matchedTermScoreVector);
			matchedTermCount = matchedTerms.size();
		}

		public void setMatchedTermBoost(float matchedTermBoost)
		{
			this.matchedTermBoost = matchedTermBoost;
		}

		public float getMatchedTermBoost()
		{
			return matchedTermBoost;
		}
		
		public void add(Match m) {
			matches.add(m);
		}
		
		public void add(Match m, boolean isExactMatch) {
			matches.add(m);
			if( isExactMatch )
				this.exactMatches++;
		}
		
		public List<Match> getMatches() {
			return matches;
		}

		public String getLine() {
			return lineText;
		}

		public int getLineNumber() {
			return lineNumber;
		}
				
		public double getTermScore()
		{
			return termScore;
		}
		
		public float[] getScoreVector()
		{
			return scoreVector;
		}
		
		public double getMatchedTermScore()
		{
			return matchedTermScore;
		}
		
		public int compareTo(MatchLine lineMatches) { // to sort by match count and then by line number
			int diff = lineMatches.exactMatches - exactMatches;
			if( diff == 0 )
				diff = lineMatches.matchedTermCount - matchedTermCount;
			if( diff == 0 )
				Double.compare(lineMatches.matchedTermBoost, matchedTermBoost);
			if(diff == 0)
				diff = Double.compare(lineMatches.termScore, termScore);
			if(diff == 0)
				return getLineNumber() - lineMatches.lineNumber; // smaller to bigger
			return diff;
		}
		
		public SearchResultDoc getResultDoc()
		{
			return doc;
		}
	
		/**
		 * Vector magnitude 
		 * 
		 * @param vect
		 * @return
		 */
		private double getMagnitude(float[] vect)
		{
			double magnitude = 0;
			
			for(float value: vect)
				magnitude+=value*value;
			
			magnitude = Math.sqrt(magnitude);
			
			return magnitude;
		}

		@Override
		public String toString()
		{
			return "Line " + lineNumber + ": (" + matchedTermCount + ")" + lineText;
		}
	}
}