package org.jabref.logic.util.io; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import org.jabref.logic.bibtexkeypattern.BibtexKeyPatternUtil; import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; import org.jabref.model.strings.StringUtil; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; class RegExpBasedFileFinder implements FileFinder { private static final Log LOGGER = LogFactory.getLog(RegExpBasedFileFinder.class); private static final String EXT_MARKER = "__EXTENSION__"; private static final Pattern ESCAPE_PATTERN = Pattern.compile("([^\\\\])\\\\([^\\\\])"); private static final Pattern SQUARE_BRACKETS_PATTERN = Pattern.compile("\\[.*?\\]"); private String regExp; private Character keywordDelimiter; /** * @param regExp The expression deciding which names are acceptable. */ RegExpBasedFileFinder(String regExp, Character keywordDelimiter) { this.regExp = regExp; this.keywordDelimiter = keywordDelimiter; } /** * Takes a string that contains bracketed expression and expands each of these using getFieldAndFormat. * <p> * Unknown Bracket expressions are silently dropped. * * @param bracketString * @param entry * @param database * @param keywordDelimiter * @return */ public static String expandBrackets(String bracketString, BibEntry entry, BibDatabase database, Character keywordDelimiter) { Matcher m = SQUARE_BRACKETS_PATTERN.matcher(bracketString); StringBuffer s = new StringBuffer(); while (m.find()) { String replacement = getFieldAndFormat(m.group(), entry, database, keywordDelimiter); m.appendReplacement(s, replacement); } m.appendTail(s); return s.toString(); } /** * Accepts a string like [author:lower] or [title:abbr] or [auth], whereas the first part signifies the bibtex-field * to get, or the key generator field marker to use, while the others are the modifiers that will be applied. */ public static String getFieldAndFormat(String fieldAndFormat, BibEntry entry, BibDatabase database, Character keywordDelimiter) { String strippedFieldAndFormat = StringUtil.stripBrackets(fieldAndFormat); int colon = strippedFieldAndFormat.indexOf(':'); String beforeColon; String afterColon; if (colon == -1) { beforeColon = strippedFieldAndFormat; afterColon = null; } else { beforeColon = strippedFieldAndFormat.substring(0, colon); afterColon = strippedFieldAndFormat.substring(colon + 1); } beforeColon = beforeColon.trim(); if (beforeColon.isEmpty()) { return ""; } // If no field value was found, try to interpret it as a key generator field marker: String fieldValue = entry.getResolvedFieldOrAlias(beforeColon, database) .orElse(BibtexKeyPatternUtil.makeLabel(entry, beforeColon, keywordDelimiter, database)); if (fieldValue == null) { return ""; } if ((afterColon == null) || afterColon.isEmpty()) { return fieldValue; } List<String> parts = Arrays.asList(afterColon.split(":")); fieldValue = BibtexKeyPatternUtil.applyModifiers(fieldValue, parts, 0); return fieldValue; } @Override public Map<BibEntry, List<Path>> findAssociatedFiles(List<BibEntry> entries, List<Path> directories, List<String> extensions) { Map<BibEntry, List<Path>> res = new HashMap<>(); for (BibEntry entry : entries) { res.put(entry, findFiles(entry, extensions, directories)); } return res; } /** * Method for searching for files using regexp. A list of extensions and directories can be * given. * @param entry The entry to search for. * @param extensions The extensions that are acceptable. * @param directories The root directories to search. * @return A list of files paths matching the given criteria. */ private List<Path> findFiles(BibEntry entry, List<String> extensions, List<Path> directories) { String extensionRegExp = '(' + String.join("|", extensions) + ')'; return findFile(entry, directories, extensionRegExp); } /** * Searches the given directory and filename pattern for a file for the * BibTeX entry. * * Used to fix: * * http://sourceforge.net/tracker/index.php?func=detail&aid=1503410&group_id=92314&atid=600309 * * Requirements: * - Be able to find the associated PDF in a set of given directories. * - Be able to return a relative path or absolute path. * - Be fast. * - Allow for flexible naming schemes in the PDFs. * * Syntax scheme for file: * <ul> * <li>* Any subDir</li> * <li>** Any subDir (recursive)</li> * <li>[key] Key from BibTeX file and database</li> * <li>.* Anything else is taken to be a Regular expression.</li> * </ul> * * @param entry * non-null * @param dirs * A set of root directories to start the search from. Paths are * returned relative to these directories if relative is set to * true. These directories will not be expanded or anything. Use * the file attribute for this. * * @return Will return the first file found to match the given criteria or * null if none was found. */ private List<Path> findFile(BibEntry entry, List<Path> dirs, String extensionRegExp) { List<Path> res = new ArrayList<>(); for (Path directory : dirs) { res.addAll(findFile(entry, directory, regExp, extensionRegExp)); } return res; } /** * The actual work-horse. Will find absolute filepaths starting from the * given directory using the given regular expression string for search. */ private List<Path> findFile(BibEntry entry, Path directory, String file, String extensionRegExp) { List<Path> res = new ArrayList<>(); String fileName = file; Path actualDirectory; if (fileName.startsWith("/")) { actualDirectory = Paths.get("."); fileName = fileName.substring(1); } else { actualDirectory = directory; } // Escape handling... Matcher m = ESCAPE_PATTERN.matcher(fileName); StringBuffer s = new StringBuffer(); while (m.find()) { m.appendReplacement(s, m.group(1) + '/' + m.group(2)); } m.appendTail(s); fileName = s.toString(); String[] fileParts = fileName.split("/"); if (fileParts.length == 0) { return res; } for (int i = 0; i < (fileParts.length - 1); i++) { String dirToProcess = fileParts[i]; dirToProcess = expandBrackets(dirToProcess, entry, null, keywordDelimiter); if (dirToProcess.matches("^.:$")) { // Windows Drive Letter actualDirectory = Paths.get(dirToProcess + '/'); continue; } if (".".equals(dirToProcess)) { // Stay in current directory continue; } if ("..".equals(dirToProcess)) { actualDirectory = actualDirectory.getParent(); continue; } if ("*".equals(dirToProcess)) { // Do for all direct subdirs File[] subDirs = actualDirectory.toFile().listFiles(); if (subDirs != null) { String restOfFileString = StringUtil.join(fileParts, "/", i + 1, fileParts.length); for (File subDir : subDirs) { if (subDir.isDirectory()) { res.addAll(findFile(entry, subDir.toPath(), restOfFileString, extensionRegExp)); } } } } // Do for all direct and indirect subdirs if ("**".equals(dirToProcess)) { String restOfFileString = StringUtil.join(fileParts, "/", i + 1, fileParts.length); try { Path finalActualDirectory = actualDirectory; Files.walk(actualDirectory).forEach(subElement -> { // We only want to transverse directory (and not the current one; this is already done below) if (!finalActualDirectory.equals(subElement) && Files.isDirectory(subElement)) { res.addAll(findFile(entry, subElement, restOfFileString, extensionRegExp)); } }); } catch (IOException e) { LOGGER.debug(e); } } // End process directory information } // Last step: check if the given file can be found in this directory String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER); String filenameToLookFor = expandBrackets(filePart, entry, null, keywordDelimiter).replaceAll(EXT_MARKER, extensionRegExp); final Pattern toMatch = Pattern.compile('^' + filenameToLookFor.replaceAll("\\\\\\\\", "\\\\") + '$', Pattern.CASE_INSENSITIVE); try { List<Path> matches = Files.find(actualDirectory, Integer.MAX_VALUE, (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches()) .collect(Collectors.toList()); res.addAll(matches); } catch (IOException e) { LOGGER.debug(e); } return res; } }