/******************************************************************************* * Copyright (c) 2014, 2016 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package org.eclipse.orion.internal.server.search; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.apache.commons.io.DirectoryWalker; import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.LineIterator; import org.eclipse.core.filesystem.EFS; import org.eclipse.core.filesystem.IFileStore; import org.eclipse.core.runtime.CoreException; import org.eclipse.orion.server.core.metastore.ProjectInfo; import org.eclipse.orion.server.core.metastore.WorkspaceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A grep style search that walks the directories looking for files that contain occurrences of a search string. * * @author Aidan Redpath * @author Anthony Hunter */ public class FileGrepper extends DirectoryWalker<SearchResult> { /** * The project currently being searched by the directory walker. */ private ProjectInfo currentProject; /** * The workspace currently being searched by the directory walker. */ private WorkspaceInfo currentWorkspace; private Logger logger = LoggerFactory.getLogger("org.eclipse.orion.server.config"); //$NON-NLS-1$ private Matcher matcher; private SearchOptions options; private Pattern pattern; /** * The constructor for FileGrepper * @param options the search options * @throws SearchException If there was a syntax error with the search term. */ public FileGrepper(SearchOptions options) throws SearchException { super(); this.options = options; if (options.isFileContentsSearch()) { pattern = buildSearchPattern(); matcher = pattern.matcher(""); } else { // remove the Lucene escaped characters, see bugzilla 458450 options.setFilenamePattern(undoLuceneEscape(options.getFilenamePattern())); } } /** * Check if the file path is acceptable. * @param filename The file path string. * @return True is the file passes any of the filename patterns (with wildcards) */ private boolean acceptFilename(String filename) { if (options.getFilenamePattern() == null) { return true; } String[] filenamePatternArray = options.getFilenamePattern().split("/"); for(String filenamePattern : filenamePatternArray ) { if (options.isFilenamePatternCaseSensitive()) { if(FilenameUtils.wildcardMatch(filename, filenamePattern)) return true; } else { if(FilenameUtils.wildcardMatch(filename.toLowerCase(), filenamePattern.toLowerCase())) return true; } } return false; } /** * Build a search pattern based on the search options. * @return A new pattern of the search term. * @throws SearchException If there was a syntax error with the search term. */ private Pattern buildSearchPattern() throws SearchException { int flags = 0; String searchTerm = options.getSearchTerm(); if (!options.isRegEx()) { if (searchTerm.startsWith("\"")) { // remove the double quotes from the start and end of the search pattern searchTerm = searchTerm.substring(1, searchTerm.length() - 1); } // remove the Lucene escaped characters searchTerm = undoLuceneEscape(searchTerm); // change ? and * to regular expression wildcards if (searchTerm.contains("?") || searchTerm.contains("*")) { if (searchTerm.startsWith("*")) { searchTerm = searchTerm.substring(1); } if (searchTerm.contains("?")) { searchTerm = searchTerm.replace('?', '.'); } if (searchTerm.contains("*")) { searchTerm = searchTerm.replace("*", ".*"); } } else { searchTerm = Pattern.quote(searchTerm); } } if (!options.isSearchTermCaseSensitive()) { flags |= Pattern.CASE_INSENSITIVE; } if (options.isSearchWholeWord()){ searchTerm = "\\b" + searchTerm + "\\b"; } /* Possible flags * UNIX_LINES CASE_INSENSITIVE COMMENTS MULTILINE LITERAL DOTALL UNICODE_CASE CANON_E UNICODE_CHARACTER_CLASS*/ try { return Pattern.compile(searchTerm, flags); } catch (PatternSyntaxException e) { throw new SearchException(e); } } @Override protected boolean handleDirectory(File directory, int depth, Collection<SearchResult> results) { if(options.isExcluded(directory.getName())) { return false; } if (results.size() >= options.getRows()) { // stop if we already have the max number of results to return return false; } else if (directory.getName().startsWith(".")) { // ignore directories starting with a dot like '.git' return false; } else { return true; } } @Override protected void handleFile(File file, int depth, Collection<SearchResult> results) { if(options.isExcluded(file.getName())) { return; } if (results.size() >= options.getRows()) { // stop if we already have the max number of results to return return; } // Check if the path is acceptable if (!acceptFilename(file.getName())) return; // Add if it is a filename search or search the file contents. if (!options.isFileContentsSearch() || searchFile(file)) { IFileStore fileStore; try { fileStore = EFS.getStore(file.toURI()); } catch (CoreException e) { logger.error("FileGrepper.handleFile: " + e.getLocalizedMessage(), e); return; } results.add(new SearchResult(fileStore, currentWorkspace, currentProject)); } } /** * Performs the search from the HTTP request * @return A list of files which contain the search term, and pass the filename patterns. * @throws SearchException If there is a problem accessing any of the files. */ public List<SearchResult> search(SearchOptions options) throws SearchException { List<SearchResult> files = new LinkedList<SearchResult>(); if(!options.isFileContentsSearch() && options.getFilenamePattern() == null) { return files; } try { for (SearchScope scope : options.getScopes()) { currentWorkspace = scope.getWorkspace(); currentProject = scope.getProject(); File file = scope.getFile(); if (!file.isDirectory()) { file = file.getParentFile(); } super.walk(file, files); } } catch (IOException e) { throw (new SearchException(e)); } return files; } /** * Searches the contents of a file * @param file The file to search * @return returns whether the search was successful * @throws IOException thrown if there is an error reading the file */ private boolean searchFile(File file) { LineIterator lineIterator = null; try { lineIterator = FileUtils.lineIterator(file); } catch (IOException e) { logger.error("FileGrepper.searchFile: " + e.getLocalizedMessage()); return false; } try { while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); if (line.contains("\0")) { // file contains binary content return false; } matcher.reset(line); if (matcher.find()) { return true; } } } finally { if (lineIterator != null) lineIterator.close(); } return false; } /** * The Orion file client performs an operation that escapes all characters in the string that require escaping * in a Lucene queries. We need to undo since we are not Lucene. * @param searchTerm The search term with escaped characters * @return the correct search term. */ private String undoLuceneEscape(String searchTerm) { if(searchTerm != null) { String specialChars = "+-&|!(){}[]^\"~:\\"; for (int i = 0; i < specialChars.length(); i++) { String character = specialChars.substring(i, i + 1); String escaped = "\\" + character; searchTerm = searchTerm.replaceAll(Pattern.quote(escaped), character); } } return searchTerm; } }