package org.jabref.logic.bibtexkeypattern; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Objects; import java.util.Optional; import java.util.Scanner; import java.util.StringJoiner; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jabref.logic.formatter.Formatters; import org.jabref.logic.formatter.casechanger.Word; import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter; import org.jabref.model.bibtexkeypattern.AbstractBibtexKeyPattern; import org.jabref.model.cleanup.Formatter; import org.jabref.model.database.BibDatabase; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.AuthorList; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.FieldName; import org.jabref.model.entry.Keyword; import org.jabref.model.entry.KeywordList; import org.jabref.model.strings.StringUtil; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * This is the utility class of the LabelPattern package. */ public class BibtexKeyPatternUtil { private static final Log LOGGER = LogFactory.getLog(BibtexKeyPatternUtil.class); private static final String STARTING_CAPITAL_PATTERN = "[^A-Z]"; // All single characters that we can use for extending a key to make it unique: private static final String CHARS = "abcdefghijklmnopqrstuvwxyz"; private static final Pattern REGEX_PATTERN = Pattern.compile(".*\\(\\{([A-Z]+)\\}\\).*"); private static final int CHARS_OF_FIRST = 5; private BibtexKeyPatternUtil() { } private static String normalize(String content) { List<String> tokens = new ArrayList<>(); int b = 0; StringBuilder and = new StringBuilder(); StringBuilder token = new StringBuilder(); for (int p = 0; p < content.length(); p++) { if (b == 0) { String andString = and.toString(); // Avoid lots of calls if (((andString.isEmpty()) && (content.charAt(p) == ' ')) || (" ".equals(andString) && (content.charAt(p) == 'a')) || (" a".equals(andString) && (content.charAt(p) == 'n')) || (" an".equals(andString) && (content.charAt(p) == 'd'))) { and.append(content.charAt(p)); } else if (" and".equals(and.toString()) && (content.charAt(p) == ' ')) { and = new StringBuilder(); tokens.add(token.toString().trim()); token = new StringBuilder(); } else { if (content.charAt(p) == '{') { b++; } if (content.charAt(p) == '}') { b--; } token.append(and); and = new StringBuilder(); token.append(content.charAt(p)); } } else { token.append(content.charAt(p)); } } tokens.add(token.toString()); StringBuilder normalized = new StringBuilder(""); for (int i = 0; i < tokens.size(); i++) { if (i > 0) { normalized.append(" and "); } normalized.append(isInstitution(tokens.get(i)) ? generateInstitutionKey(tokens.get(i)) : removeDiacritics( tokens.get(i))); } return normalized.toString(); } /** * Will remove diacritics from the content. * * Replaces umlaut: \"x with xe, e.g. \"o -> oe, \"u -> ue, etc. * Removes all other diacritics: \?x -> x, e.g. \'a -> a, etc. * * @param content The content. * @return The content without diacritics. */ private static String removeDiacritics(String content) { if (content.isEmpty()) { return content; } String result = content; // Replace umlaut with '?e' result = result.replaceAll("\\{\\\\\"([a-zA-Z])\\}", "$1e"); result = result.replaceAll("\\\\\"\\{([a-zA-Z])\\}", "$1e"); result = result.replaceAll("\\\\\"([a-zA-Z])", "$1e"); // Remove diacritics result = result.replaceAll("\\{\\\\.([a-zA-Z])\\}", "$1"); result = result.replaceAll("\\\\.\\{([a-zA-Z])\\}", "$1"); result = result.replaceAll("\\\\.([a-zA-Z])", "$1"); return result; } /** * Unifies umlauts. * * Replaces: $\ddot{\mathrm{X}}$ (an alternative umlaut) with: {\"X} * Replaces: \?{X} and \?X with {\?X}, where ? is a diacritic symbol * * @param content The content. * @return The content with unified diacritics. */ private static String unifyDiacritics(String content) { return content.replaceAll( "\\$\\\\ddot\\{\\\\mathrm\\{([^\\}])\\}\\}\\$", "{\\\"$1}").replaceAll( "(\\\\[^\\-a-zA-Z])\\{?([a-zA-Z])\\}?", "{$1$2}"); } /** * Check if a value is institution. * * This is usable for distinguishing between persons and institutions in * the author or editor fields. * * A person: * - "John Doe" * - "Doe, John" * * An institution: * - "{The Big Company or Institution Inc.}" * - "{The Big Company or Institution Inc. (BCI)}" * * @param author Author or editor. * @return True if the author or editor is an institution. */ private static boolean isInstitution(String author) { return StringUtil.isInCurlyBrackets(author); } /** * <p> * An author or editor may be and institution not a person. In that case the * key generator builds very long keys, e.g.: for “The Attributed * Graph Grammar System (AGG)” -> * “TheAttributedGraphGrammarSystemAGG”. * </p> * * <p> * An institution name should be inside <code>{}</code> brackets. If the * institution name also includes its abbreviation this abbreviation should * be also in <code>{}</code> brackets. For the previous example the value * should look like: * <code>{The Attributed Graph Grammar System ({AGG})}</code>. * </p> * * <p> * If an institution includes its abbreviation, i.e. "...({XYZ})", first * such abbreviation should be used as the key value part of such author. * </p> * * <p> * If an institution does not include its abbreviation the key should be * generated form its name in the following way: * </p> * * <p> * The institution value can contain: institution name, part of the * institution, address, etc. Those information should be separated by * comma. Name of the institution and possible part of the institution * should be on the beginning, while address and secondary information * should be on the end. * </p> * * Each part is examined separately: * <ol> * <li>We remove all tokens of a part which are one of the defined ignore * words (the, press), which end with a dot (ltd., co., ...) and which first * character is lowercase (of, on, di, ...).</li> * <li>We detect a type of the part: university, technology institute, * department, school, rest * <ul> * <li>University: <code>"Uni[NameOfTheUniversity]"</code></li> * <li>Department: will be an abbreviation of all words beginning with the * uppercase letter except of words: <code>d[ei]p.*</code>, school, * faculty</li> * <li>School: same as department</li> * <li>Rest: If there are less than 3 tokens in such part than the result * will be by concatenating those tokens, otherwise the result will be build * from the first letters of words starting with and uppercase letter.</li> * </ul> * </ol> * * Parts are concatenated together in the following way: * <ul> * <li>If there is a university part use it otherwise use the rest part.</li> * <li>If there is a school part append it.</li> * <li>If there is a department part and it is not same as school part * append it.</li> * </ul> * * Rest part is only the first part which do not match any other type. All * other parts (address, ...) are ignored. * * @param content the institution to generate a Bibtex key for * @return <ul> * <li>the institution key</li> * <li>"" in the case of a failure</li> * <li>null if content is null</li> * </ul> */ private static String generateInstitutionKey(String content) { if (content.isEmpty()) { return content; } String result = content; result = unifyDiacritics(result); result = result.replaceAll("^\\{", "").replaceAll("\\}$", ""); Matcher matcher = REGEX_PATTERN.matcher(result); if (matcher.matches()) { return matcher.group(1); } result = removeDiacritics(result); String[] parts = result.split(","); // Key parts String university = null; String department = null; String school = null; String rest = null; List<String> ignore = Arrays.asList("press", "the"); for (int index = 0; index < parts.length; index++) { List<String> part = new ArrayList<>(); // Cleanup: remove unnecessary words. for (String k : parts[index].replaceAll("\\{[A-Z]+\\}", "").split("[ \\-_]")) { if ((!(k.isEmpty()) // remove empty && !ignore.contains(k.toLowerCase(Locale.ENGLISH)) // remove ignored words && (k.charAt(k.length() - 1) != '.') && (String.valueOf(k.charAt(0))).matches("[A-Z]")) || ((k.length() >= 3) && "uni".equalsIgnoreCase(k.substring(0, 2)))) { part.add(k); } } boolean isUniversity = false; // university boolean isTechnology = false; // technology institute boolean isDepartment = false; // departments boolean isSchool = false; // schools // Deciding about a part type... for (String k : part) { if (k.matches("^[Uu][Nn][Ii].*")) { // Starts with "uni" case and locale independent isUniversity = true; } if (k.matches("^[Tt][Ee][Cc][Hh].*")) { // Starts with "tech" case and locale independent isTechnology = true; } if (FieldName.SCHOOL.equalsIgnoreCase(k)) { isSchool = true; } if (k.matches("^[Dd][EeIi][Pp].*") || k.matches("^[Ll][Aa][Bb].*")) { // Starts with "dep"/"dip"/"lab", case and locale independent isDepartment = true; } } if (isTechnology) { isUniversity = false; // technology institute isn't university :-) } // University part looks like: Uni[NameOfTheUniversity] // // If university is detected than the previous part is suggested // as department if (isUniversity) { StringBuilder universitySB = new StringBuilder(); universitySB.append("Uni"); for (String k : part) { if (!k.matches("^[Uu][Nn][Ii].*")) { universitySB.append(k); } } university = universitySB.toString(); if ((index > 0) && (department == null)) { department = parts[index - 1]; } // School is an abbreviation of all the words beginning with a // capital letter excluding: department, school and faculty words. // // Explicitly defined department part is build the same way as // school } else if (isSchool || isDepartment) { StringBuilder schoolSB = new StringBuilder(); StringBuilder departmentSB = new StringBuilder(); for (String k : part) { if (!k.matches("^[Dd][EeIi][Pp].*") && !FieldName.SCHOOL.equalsIgnoreCase(k) && !"faculty".equalsIgnoreCase(k) && !(k.replaceAll(STARTING_CAPITAL_PATTERN, "").isEmpty())) { if (isSchool) { schoolSB.append(k.replaceAll(STARTING_CAPITAL_PATTERN, "")); } if (isDepartment) { departmentSB.append(k.replaceAll(STARTING_CAPITAL_PATTERN, "")); } } } if (isSchool) { school = schoolSB.toString(); } if (isDepartment) { department = departmentSB.toString(); } // A part not matching university, department nor school. } else if (rest == null) { StringBuilder restSB = new StringBuilder(); // Less than 3 parts -> concatenate those if (part.size() < 3) { for (String k : part) { restSB.append(k); // More than 3 parts -> use 1st letter abbreviation } } else { for (String k : part) { k = k.replaceAll(STARTING_CAPITAL_PATTERN, ""); if (!(k.isEmpty())) { restSB.append(k); } } } rest = restSB.toString(); } } // Putting parts together. return (university == null ? rest : university) + (school == null ? "" : school) + ((department == null) || ((school != null) && department.equals(school)) ? "" : department); } /** * Generates a BibTeX label according to the pattern for a given entry type, and saves the unique label in the * <code>Bibtexentry</code>. * * The given database is used to avoid duplicate keys. * * @param citeKeyPattern * @param database a <code>BibDatabase</code> * @param entry a <code>BibEntry</code> * @return modified BibEntry */ public static void makeAndSetLabel(AbstractBibtexKeyPattern citeKeyPattern, BibDatabase database, BibEntry entry, BibtexKeyPatternPreferences bibtexKeyPatternPreferences) { String newKey = makeLabel(citeKeyPattern, database, entry, bibtexKeyPatternPreferences); entry.setCiteKey(newKey); } private static String makeLabel(AbstractBibtexKeyPattern citeKeyPattern, BibDatabase database, BibEntry entry, BibtexKeyPatternPreferences bibtexKeyPatternPreferences) { String key; StringBuilder stringBuilder = new StringBuilder(); try { // get the type of entry String entryType = entry.getType(); // Get the arrayList corresponding to the type List<String> typeList = new ArrayList<>(citeKeyPattern.getValue(entryType)); if (!typeList.isEmpty()) { typeList.remove(0); } boolean field = false; for (String typeListEntry : typeList) { if ("[".equals(typeListEntry)) { field = true; } else if ("]".equals(typeListEntry)) { field = false; } else if (field) { // check whether there is a modifier on the end such as // ":lower" List<String> parts = parseFieldMarker(typeListEntry); String label = makeLabel(entry, parts.get(0), bibtexKeyPatternPreferences.getKeywordDelimiter(), database); // apply modifier if present if (parts.size() > 1) { label = applyModifiers(label, parts, 1); } stringBuilder.append(label); } else { stringBuilder.append(typeListEntry); } } } catch (Exception e) { LOGGER.warn("Cannot make label", e); } // Remove all illegal characters from the key. key = checkLegalKey(stringBuilder.toString(), bibtexKeyPatternPreferences.isEnforceLegalKey()); // Remove Regular Expressions while generating Keys String regex = bibtexKeyPatternPreferences.getKeyPatternRegex(); if ((regex != null) && !regex.trim().isEmpty()) { String replacement = bibtexKeyPatternPreferences.getKeyPatternReplacement(); key = key.replaceAll(regex, replacement); } String oldKey = entry.getCiteKeyOptional().orElse(null); int occurrences = database.getDuplicationChecker().getNumberOfKeyOccurrences(key); if (Objects.equals(oldKey, key)) { occurrences--; // No change, so we can accept one dupe. } boolean alwaysAddLetter = bibtexKeyPatternPreferences.isAlwaysAddLetter(); boolean firstLetterA = bibtexKeyPatternPreferences.isFirstLetterA(); String newKey; if (!alwaysAddLetter && (occurrences == 0)) { newKey = key; } else { // The key is already in use, so we must modify it. int number = !alwaysAddLetter && !firstLetterA ? 1 : 0; String moddedKey; do { moddedKey = key + getAddition(number); number++; occurrences = database.getDuplicationChecker().getNumberOfKeyOccurrences(moddedKey); // only happens if #getAddition() is buggy if (Objects.equals(oldKey, moddedKey)) { occurrences--; } } while (occurrences > 0); newKey = moddedKey; } return newKey; } /** * Applies modifiers to a label generated based on a field marker. * @param label The generated label. * @param parts String array containing the modifiers. * @param offset The number of initial items in the modifiers array to skip. * @return The modified label. */ public static String applyModifiers(final String label, final List<String> parts, final int offset) { String resultingLabel = label; if (parts.size() > offset) { for (int j = offset; j < parts.size(); j++) { String modifier = parts.get(j); if ("abbr".equals(modifier)) { // Abbreviate - that is, StringBuilder abbreviateSB = new StringBuilder(); String[] words = resultingLabel.replaceAll("[\\{\\}']", "") .split("[\\(\\) \r\n\"]"); for (String word : words) { if (!word.isEmpty()) { abbreviateSB.append(word.charAt(0)); } } resultingLabel = abbreviateSB.toString(); } else { Optional<Formatter> formatter = Formatters.getFormatterForModifier(modifier); if (formatter.isPresent()) { resultingLabel = formatter.get().format(label); } else if (!modifier.isEmpty() && (modifier.length() >= 2) && (modifier.charAt(0) == '(') && modifier.endsWith(")")) { // Alternate text modifier in parentheses. Should be inserted if // the label is empty: if (label.isEmpty() && (modifier.length() > 2)) { resultingLabel = modifier.substring(1, modifier.length() - 1); } else { resultingLabel = label; } } else { LOGGER.info("Key generator warning: unknown modifier '" + modifier + "'."); resultingLabel = label; } } } } return resultingLabel; } public static String makeLabel(BibEntry entry, String value, Character keywordDelimiter, BibDatabase database) { String val = value; try { if (val.startsWith("auth") || val.startsWith("pureauth")) { /* * For label code "auth...": if there is no author, but there * are editor(s) (e.g. for an Edited Book), use the editor(s) * instead. (saw27@mrao.cam.ac.uk). This is what most people * want, but in case somebody really needs a field which expands * to nothing if there is no author (e.g. someone who uses both * "auth" and "ed" in the same label), we provide an alternative * form "pureauth..." which does not do this fallback * substitution of editor. */ String authString; if (database != null) { authString = entry.getField(FieldName.AUTHOR) .map(authorString -> normalize(database.resolveForStrings(authorString))).orElse(""); } else { authString = entry.getField(FieldName.AUTHOR).orElse(""); } if (val.startsWith("pure")) { // remove the "pure" prefix so the remaining // code in this section functions correctly val = val.substring(4); } if (authString.isEmpty()) { if (database != null) { authString = entry.getField(FieldName.EDITOR) .map(authorString -> normalize(database.resolveForStrings(authorString))).orElse(""); } else { authString = entry.getField(FieldName.EDITOR).orElse(""); } } // Gather all author-related checks, so we don't // have to check all the time. if ("auth".equals(val)) { return firstAuthor(authString); } else if ("authForeIni".equals(val)) { return firstAuthorForenameInitials(authString); } else if ("authFirstFull".equals(val)) { return firstAuthorVonAndLast(authString); } else if ("authors".equals(val)) { return allAuthors(authString); } else if ("authorsAlpha".equals(val)) { return authorsAlpha(authString); } // Last author's last name else if ("authorLast".equals(val)) { return lastAuthor(authString); } else if ("authorLastForeIni".equals(val)) { return lastAuthorForenameInitials(authString); } else if ("authorIni".equals(val)) { return oneAuthorPlusIni(authString); } else if (val.matches("authIni[\\d]+")) { int num = Integer.parseInt(val.substring(7)); return authIniN(authString, num); } else if ("auth.auth.ea".equals(val)) { return authAuthEa(authString); } else if ("auth.etal".equals(val)) { return authEtal(authString, ".", ".etal"); } else if ("authEtAl".equals(val)) { return authEtal(authString, "", "EtAl"); } else if ("authshort".equals(val)) { return authshort(authString); } else if (val.matches("auth[\\d]+_[\\d]+")) { String[] nums = val.substring(4).split("_"); return authNofMth(authString, Integer.parseInt(nums[0]), Integer.parseInt(nums[1])); } else if (val.matches("auth\\d+")) { // authN. First N chars of the first author's last // name. String fa = firstAuthor(authString); int num = Integer.parseInt(val.substring(4)); if (num > fa.length()) { num = fa.length(); } return fa.substring(0, num); } else if (val.matches("authors\\d+")) { return nAuthors(authString, Integer.parseInt(val.substring(7))); } else { // This "auth" business was a dead end, so just // use it literally: return entry.getFieldOrAlias(val).orElse(""); } } else if (val.startsWith("ed")) { // Gather all markers starting with "ed" here, so we // don't have to check all the time. if ("edtr".equals(val)) { return firstAuthor(entry.getField(FieldName.EDITOR).orElse("")); } else if ("edtrForeIni".equals(val)) { return firstAuthorForenameInitials(entry.getField(FieldName.EDITOR).orElse("")); } else if ("editors".equals(val)) { return allAuthors(entry.getField(FieldName.EDITOR).orElse("")); // Last author's last name } else if ("editorLast".equals(val)) { return lastAuthor(entry.getField(FieldName.EDITOR).orElse("")); } else if ("editorLastForeIni".equals(val)) { return lastAuthorForenameInitials(entry.getField(FieldName.EDITOR).orElse("")); } else if ("editorIni".equals(val)) { return oneAuthorPlusIni(entry.getField(FieldName.EDITOR).orElse("")); } else if (val.matches("edtrIni[\\d]+")) { int num = Integer.parseInt(val.substring(7)); return authIniN(entry.getField(FieldName.EDITOR).orElse(""), num); } else if (val.matches("edtr[\\d]+_[\\d]+")) { String[] nums = val.substring(4).split("_"); return authNofMth(entry.getField(FieldName.EDITOR).orElse(""), Integer.parseInt(nums[0]), Integer.parseInt(nums[1]) - 1); } else if ("edtr.edtr.ea".equals(val)) { return authAuthEa(entry.getField(FieldName.EDITOR).orElse("")); } else if ("edtrshort".equals(val)) { return authshort(entry.getField(FieldName.EDITOR).orElse("")); } // authN. First N chars of the first author's last // name. else if (val.matches("edtr\\d+")) { String fa = firstAuthor(entry.getField(FieldName.EDITOR).orElse("")); int num = Integer.parseInt(val.substring(4)); if (num > fa.length()) { num = fa.length(); } return fa.substring(0, num); } else { // This "ed" business was a dead end, so just // use it literally: return entry.getFieldOrAlias(val).orElse(""); } } else if ("firstpage".equals(val)) { return firstPage(entry.getField(FieldName.PAGES).orElse("")); } else if ("lastpage".equals(val)) { return lastPage(entry.getField(FieldName.PAGES).orElse("")); } else if ("title".equals(val)) { return camelizeSignificantWordsInTitle(entry.getField(FieldName.TITLE).orElse("")); } else if ("shorttitle".equals(val)) { return getTitleWords(3, entry.getField(FieldName.TITLE).orElse("")); } else if ("shorttitleINI".equals(val)) { return keepLettersAndDigitsOnly( applyModifiers(getTitleWordsWithSpaces(3, entry.getField(FieldName.TITLE).orElse("")), Collections.singletonList("abbr"), 0)); } else if ("veryshorttitle".equals(val)) { return getTitleWords(1, removeSmallWords(entry.getField(FieldName.TITLE).orElse(""))); } else if ("camel".equals(val)) { return getCamelizedTitle(entry.getField(FieldName.TITLE).orElse("")); } else if ("shortyear".equals(val)) { String yearString = entry.getFieldOrAlias(FieldName.YEAR).orElse(""); if (yearString.isEmpty()) { return yearString; // In press/in preparation/submitted } else if (yearString.startsWith("in") || yearString.startsWith("sub")) { return "IP"; } else if (yearString.length() > 2) { return yearString.substring(yearString.length() - 2); } else { return yearString; } } else if (val.matches("keyword\\d+")) { // according to LabelPattern.php, it returns keyword number n int num = Integer.parseInt(val.substring(7)); KeywordList separatedKeywords = entry.getKeywords(keywordDelimiter); if (separatedKeywords.size() < num) { // not enough keywords return ""; } else { // num counts from 1 to n, but index in arrayList count from 0 to n-1 return separatedKeywords.get(num - 1).toString(); } } else if (val.matches("keywords\\d*")) { // return all keywords, not separated int num; if (val.length() > 8) { num = Integer.parseInt(val.substring(8)); } else { num = Integer.MAX_VALUE; } KeywordList separatedKeywords = entry.getKeywords(keywordDelimiter); StringBuilder sb = new StringBuilder(); int i = 0; for (Keyword keyword : separatedKeywords) { // remove all spaces sb.append(keyword.toString().replaceAll("\\s+", "")); i++; if (i >= num) { break; } } return sb.toString(); } else { // we haven't seen any special demands return entry.getFieldOrAlias(val).orElse(""); } } catch (NullPointerException ex) { LOGGER.debug("Problem making label", ex); return ""; } } /** * Computes an appendix to a BibTeX key that could make it unique. We use * a-z for numbers 0-25, and then aa-az, ba-bz, etc. * * @param number * The appendix number. * @return The String to append. */ private static String getAddition(int number) { if (number >= CHARS.length()) { int lastChar = number % CHARS.length(); return getAddition((number / CHARS.length()) - 1) + CHARS.substring(lastChar, lastChar + 1); } else { return CHARS.substring(number, number + 1); } } /** * Determines "number" words out of the "title" field in the given BibTeX entry */ public static String getTitleWords(int number, String title) { return keepLettersAndDigitsOnly(getTitleWordsWithSpaces(number, title)); } /** * Removes any '-', unnecessary whitespace and latex commands formatting */ private static String formatTitle(String title) { String ss = new RemoveLatexCommandsFormatter().format(title); StringBuilder stringBuilder = new StringBuilder(); StringBuilder current; int piv = 0; while (piv < ss.length()) { current = new StringBuilder(); // Get the next word: while ((piv < ss.length()) && !Character.isWhitespace(ss.charAt(piv)) && (ss.charAt(piv) != '-')) { current.append(ss.charAt(piv)); piv++; } piv++; // Check if it is ok: String word = current.toString().trim(); if (word.isEmpty()) { continue; } // If we get here, the word was accepted. if (stringBuilder.length() > 0) { stringBuilder.append(' '); } stringBuilder.append(word); } return stringBuilder.toString(); } /** * Capitalises and concatenates the words out of the "title" field in the given BibTeX entry */ public static String getCamelizedTitle(String title) { return keepLettersAndDigitsOnly(camelizeTitle(title)); } private static String camelizeTitle(String title) { StringBuilder stringBuilder = new StringBuilder(); String formattedTitle = formatTitle(title); try (Scanner titleScanner = new Scanner(formattedTitle)) { while (titleScanner.hasNext()) { String word = titleScanner.next(); // Camelize the word word = word.substring(0, 1).toUpperCase(Locale.ROOT) + word.substring(1); if (stringBuilder.length() > 0) { stringBuilder.append(' '); } stringBuilder.append(word); } } return stringBuilder.toString(); } /** * Capitalises the significant words of the "title" field in the given BibTeX entry */ public static String camelizeSignificantWordsInTitle(String title) { StringJoiner stringJoiner = new StringJoiner(" "); String formattedTitle = formatTitle(title); Boolean camelize; try (Scanner titleScanner = new Scanner(formattedTitle)) { while (titleScanner.hasNext()) { String word = titleScanner.next(); camelize = true; // Camelize the word if it is significant for (String smallWord : Word.SMALLER_WORDS) { if (word.equalsIgnoreCase(smallWord)) { camelize = false; continue; } } // We want to capitalize significant words and the first word of the title if (camelize || (stringJoiner.length() == 0)) { word = word.substring(0, 1).toUpperCase(Locale.ROOT) + word.substring(1); } else { word = word.substring(0, 1).toLowerCase(Locale.ROOT) + word.substring(1); } stringJoiner.add(word); } } return stringJoiner.toString(); } public static String removeSmallWords(String title) { StringJoiner stringJoiner = new StringJoiner(" "); String formattedTitle = formatTitle(title); try (Scanner titleScanner = new Scanner(formattedTitle)) { mainl: while (titleScanner.hasNext()) { String word = titleScanner.next(); for (String smallWord : Word.SMALLER_WORDS) { if (word.equalsIgnoreCase(smallWord)) { continue mainl; } } stringJoiner.add(word); } } return stringJoiner.toString(); } private static String getTitleWordsWithSpaces(int number, String title) { StringJoiner stringJoiner = new StringJoiner(" "); String formattedTitle = formatTitle(title); int words = 0; try (Scanner titleScanner = new Scanner(formattedTitle)) { while (titleScanner.hasNext() && (words < number)) { String word = titleScanner.next(); stringJoiner.add(word); words++; } } return stringJoiner.toString(); } private static String keepLettersAndDigitsOnly(String in) { StringBuilder stringBuilder = new StringBuilder(); for (int i = 0; i < in.length(); i++) { if (Character.isLetterOrDigit(in.charAt(i))) { stringBuilder.append(in.charAt(i)); } } return stringBuilder.toString(); } /** * Gets the last name of the first author/editor * * @param authorField * a <code>String</code> * @return the surname of an author/editor or "" if no author was found * This method is guaranteed to never return null. * * @throws NullPointerException * if authorField == null */ public static String firstAuthor(String authorField) { AuthorList authorList = AuthorList.parse(authorField); if (authorList.isEmpty()) { return ""; } return authorList.getAuthor(0).getLast().orElse(""); } /** * Gets the first name initials of the first author/editor * * @param authorField * a <code>String</code> * @return the first name initial of an author/editor or "" if no author was found * This method is guaranteed to never return null. * * @throws NullPointerException * if authorField == null */ public static String firstAuthorForenameInitials(String authorField) { AuthorList authorList = AuthorList.parse(authorField); if (authorList.isEmpty()) { return ""; } return authorList.getAuthor(0).getFirstAbbr().map(s -> s.substring(0, 1)).orElse(""); } /** * Gets the von part and the last name of the first author/editor * No spaces are returned * * @param authorField * a <code>String</code> * @return the von part and surname of an author/editor or "" if no author was found. * This method is guaranteed to never return null. * * @throws NullPointerException * if authorField == null */ public static String firstAuthorVonAndLast(String authorField) { AuthorList authorList = AuthorList.parse(authorField); if (authorList.isEmpty()) { return ""; } StringBuilder stringBuilder = new StringBuilder(); authorList.getAuthor(0).getVon().ifPresent(vonAuthor -> stringBuilder.append(vonAuthor.replaceAll(" ", ""))); authorList.getAuthor(0).getLast().ifPresent(stringBuilder::append); return stringBuilder.toString(); } /** * Gets the last name of the last author/editor * @param authorField a <code>String</code> * @return the surname of an author/editor */ public static String lastAuthor(String authorField) { String[] tokens = AuthorList.fixAuthorForAlphabetization(authorField).split("\\s+\\band\\b\\s+"); if (tokens.length > 0) { String[] lastAuthor = tokens[tokens.length - 1].split(","); return lastAuthor[0]; } else { // if author is empty return ""; } } /** * Gets the forename initials of the last author/editor * * @param authorField * a <code>String</code> * @return the forename initial of an author/editor or "" if no author was found * This method is guaranteed to never return null. * * @throws NullPointerException * if authorField == null */ public static String lastAuthorForenameInitials(String authorField) { AuthorList authorList = AuthorList.parse(authorField); if (authorList.isEmpty()) { return ""; } return authorList.getAuthor(authorList.getNumberOfAuthors() - 1).getFirstAbbr().map(s -> s.substring(0, 1)) .orElse(""); } /** * Gets the last name of all authors/editors * @param authorField a <code>String</code> * @return the sur name of all authors/editors */ public static String allAuthors(String authorField) { // Quick hack to use NAuthors to avoid code duplication return nAuthors(authorField, Integer.MAX_VALUE); } /** * Returns the authors according to the BibTeX-alpha-Style * @param authorField string containing the value of the author field * @return the initials of all authornames */ public static String authorsAlpha(String authorField) { String authors = ""; String fixedAuthors = AuthorList.fixAuthorLastNameOnlyCommas(authorField, false); // drop the "and" before the last author // -> makes processing easier fixedAuthors = fixedAuthors.replace(" and ", ", "); String[] tokens = fixedAuthors.split(","); int max = tokens.length > 4 ? 3 : tokens.length; if (max == 1) { String[] firstAuthor = tokens[0].replaceAll("\\s+", " ").trim().split(" "); // take first letter of any "prefixes" (e.g. van der Aalst -> vd) for (int j = 0; j < (firstAuthor.length - 1); j++) { authors = authors.concat(firstAuthor[j].substring(0, 1)); } // append last part of last name completely authors = authors.concat(firstAuthor[firstAuthor.length - 1].substring(0, Math.min(3, firstAuthor[firstAuthor.length - 1].length()))); } else { for (int i = 0; i < max; i++) { // replace all whitespaces by " " // split the lastname at " " String[] curAuthor = tokens[i].replaceAll("\\s+", " ").trim().split(" "); for (String aCurAuthor : curAuthor) { // use first character of each part of lastname authors = authors.concat(aCurAuthor.substring(0, 1)); } } if (tokens.length > 4) { authors = authors.concat("+"); } } return authors; } /** * Gets the surnames of the first N authors and appends EtAl if there are more than N authors * @param authorField a <code>String</code> * @param n the number of desired authors * @return Gets the surnames of the first N authors and appends EtAl if there are more than N authors */ public static String nAuthors(String authorField, int n) { String[] tokens = AuthorList.fixAuthorForAlphabetization(authorField).split("\\s+\\band\\b\\s+"); int i = 0; StringBuilder authorSB = new StringBuilder(); while ((tokens.length > i) && (i < n)) { String lastName = tokens[i].replaceAll(",\\s+.*", ""); authorSB.append(lastName); i++; } if (tokens.length > n) { authorSB.append("EtAl"); } return authorSB.toString(); } /** * Gets the first part of the last name of the first * author/editor, and appends the last name initial of the * remaining authors/editors. * Maximum 5 characters * @param authorField a <code>String</code> * @return the surname of all authors/editors */ public static String oneAuthorPlusIni(String authorField) { String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField); String[] tokens = fixedAuthorField.split("\\s+\\band\\b\\s+"); if (tokens.length == 0) { return ""; } String firstAuthor = tokens[0].split(",")[0]; StringBuilder authorSB = new StringBuilder(); authorSB.append(firstAuthor.substring(0, Math.min(CHARS_OF_FIRST, firstAuthor.length()))); int i = 1; while (tokens.length > i) { // convert lastname, firstname to firstname lastname authorSB.append(tokens[i].charAt(0)); i++; } return authorSB.toString(); } /** * auth.auth.ea format: * Isaac Newton and James Maxwell and Albert Einstein (1960) * Isaac Newton and James Maxwell (1960) * give: * Newton.Maxwell.ea * Newton.Maxwell */ public static String authAuthEa(String authorField) { String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField); String[] tokens = fixedAuthorField.split("\\s+\\band\\b\\s+"); if (tokens.length == 0) { return ""; } StringBuilder author = new StringBuilder(); // append first author author.append((tokens[0].split(","))[0]); if (tokens.length >= 2) { // append second author author.append('.').append((tokens[1].split(","))[0]); } if (tokens.length > 2) { // append ".ea" if more than 2 authors author.append(".ea"); } return author.toString(); } /** * auth.etal, authEtAl, ... format: * Isaac Newton and James Maxwell and Albert Einstein (1960) * Isaac Newton and James Maxwell (1960) * * auth.etal give (delim=".", append=".etal"): * Newton.etal * Newton.Maxwell * * authEtAl give (delim="", append="EtAl"): * NewtonEtAl * NewtonMaxwell * * Note that [authEtAl] equals [authors2] */ public static String authEtal(String authorField, String delim, String append) { String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField); String[] tokens = fixedAuthorField.split("\\s*\\band\\b\\s*"); if (tokens.length == 0) { return ""; } StringBuilder author = new StringBuilder(); author.append((tokens[0].split(","))[0]); if (tokens.length == 2) { author.append(delim).append((tokens[1].split(","))[0]); } else if (tokens.length > 2) { author.append(append); } return author.toString(); } /** * The first N characters of the Mth author/editor. * M starts counting from 1 */ public static String authNofMth(String authorField, int n, int m) { // have m counting from 0 int mminusone = m - 1; String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField); String[] tokens = fixedAuthorField.split("\\s+\\band\\b\\s+"); if ((tokens.length <= mminusone) || (n < 0) || (mminusone < 0)) { return ""; } String lastName = (tokens[mminusone].split(","))[0]; if (lastName.length() <= n) { return lastName; } else { return lastName.substring(0, n); } } /** * authshort format: * added by Kolja Brix, kbx@users.sourceforge.net * * given author names * * Isaac Newton and James Maxwell and Albert Einstein and N. Bohr * * Isaac Newton and James Maxwell and Albert Einstein * * Isaac Newton and James Maxwell * * Isaac Newton * * yield * * NME+ * * NME * * NM * * Newton */ public static String authshort(String authorField) { String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField); StringBuilder author = new StringBuilder(); String[] tokens = fixedAuthorField.split("\\band\\b"); int i = 0; if (tokens.length == 1) { author.append(authNofMth(fixedAuthorField, fixedAuthorField.length(), 1)); } else if (tokens.length >= 2) { while ((tokens.length > i) && (i < 3)) { author.append(authNofMth(fixedAuthorField, 1, i + 1)); i++; } if (tokens.length > 3) { author.append('+'); } } return author.toString(); } /** * authIniN format: * * Each author gets (N div #authors) chars, the remaining (N mod #authors) * chars are equally distributed to the authors first in the row. * * If (N < #authors), only the first N authors get mentioned. * * For example if * * a) I. Newton and J. Maxwell and A. Einstein and N. Bohr (..) * * b) I. Newton and J. Maxwell and A. Einstein * * c) I. Newton and J. Maxwell * * d) I. Newton * * authIni4 gives: a) NMEB, b) NeME, c) NeMa, d) Newt * * @param authorField * The authors to format. * * @param n * The maximum number of characters this string will be long. A * negative number or zero will lead to "" be returned. * * @throws NullPointerException * if authorField is null and n > 0 */ public static String authIniN(String authorField, int n) { if (n <= 0) { return ""; } String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField); StringBuilder author = new StringBuilder(); String[] tokens = fixedAuthorField.split("\\band\\b"); if (tokens.length == 0) { return author.toString(); } int i = 0; int charsAll = n / tokens.length; while (tokens.length > i) { if (i < (n % tokens.length)) { author.append(authNofMth(fixedAuthorField, charsAll + 1, i + 1)); } else { author.append(authNofMth(fixedAuthorField, charsAll, i + 1)); } i++; } if (author.length() <= n) { return author.toString(); } else { return author.toString().substring(0, n); } } /** * Split the pages field into separate numbers and return the lowest * * @param pages * (may not be null) a pages string such as 42--111 or * 7,41,73--97 or 43+ * * @return the first page number or "" if no number is found in the string * * @throws NullPointerException * if pages is null */ public static String firstPage(String pages) { final String[] splitPages = pages.split("\\D+"); int result = Integer.MAX_VALUE; for (String n : splitPages) { if (n.matches("\\d+")) { result = Math.min(Integer.parseInt(n), result); } } if (result == Integer.MAX_VALUE) { return ""; } else { return String.valueOf(result); } } /** * Split the pages field into separate numbers and return the highest * * @param pages * a pages string such as 42--111 or 7,41,73--97 or 43+ * * @return the first page number or "" if no number is found in the string * * @throws NullPointerException * if pages is null. */ public static String lastPage(String pages) { final String[] splitPages = pages.split("\\D+"); int result = Integer.MIN_VALUE; for (String n : splitPages) { if (n.matches("\\d+")) { result = Math.max(Integer.parseInt(n), result); } } if (result == Integer.MIN_VALUE) { return ""; } else { return String.valueOf(result); } } /** * Parse a field marker with modifiers, possibly containing a parenthesised modifier, * as well as escaped colons and parentheses. * @param arg The argument string. * @return An array of strings representing the parts of the marker */ private static List<String> parseFieldMarker(String arg) { List<String> parts = new ArrayList<>(); StringBuilder current = new StringBuilder(); boolean escaped = false; int inParenthesis = 0; for (int i = 0; i < arg.length(); i++) { char currentChar = arg.charAt(i); if ((currentChar == ':') && !escaped && (inParenthesis == 0)) { parts.add(current.toString()); current = new StringBuilder(); } else if ((currentChar == '(') && !escaped) { inParenthesis++; current.append(currentChar); } else if ((currentChar == ')') && !escaped && (inParenthesis > 0)) { inParenthesis--; current.append(currentChar); } else if (currentChar == '\\') { if (escaped) { escaped = false; current.append(currentChar); } else { escaped = true; } } else if (escaped) { current.append(currentChar); escaped = false; } else { current.append(currentChar); } } parts.add(current.toString()); return parts; } /** * This method returns a String similar to the one passed in, except that it is molded into a form that is * acceptable for bibtex. * <p> * Watch-out that the returned string might be of length 0 afterwards. * * @param key mayBeNull * @param enforceLegalKey make sure that the key is legal in all respects */ public static String checkLegalKey(String key, boolean enforceLegalKey) { if (key == null) { return null; } if (!enforceLegalKey) { // User doesn't want us to enforce legal characters. We must still look // for whitespace and some characters such as commas, since these would // interfere with parsing: StringBuilder newKey = new StringBuilder(); for (int i = 0; i < key.length(); i++) { char c = key.charAt(i); if (!Character.isWhitespace(c) && ("{}(),\\\"".indexOf(c) == -1)) { newKey.append(c); } } return newKey.toString(); } StringBuilder newKey = new StringBuilder(); for (int i = 0; i < key.length(); i++) { char c = key.charAt(i); if (!Character.isWhitespace(c) && ("{}(),\\\"#~^'".indexOf(c) == -1)) { newKey.append(c); } } // Replace non-English characters like umlauts etc. with a sensible // letter or letter combination that bibtex can accept. return StringUtil.replaceSpecialCharacters(newKey.toString()); } public static String makeLabel(BibDatabaseContext bibDatabaseContext, BibEntry entry, BibtexKeyPatternPreferences bibtexKeyPatternPreferences) { AbstractBibtexKeyPattern citeKeyPattern = bibDatabaseContext.getMetaData().getCiteKeyPattern(bibtexKeyPatternPreferences.getKeyPattern()); return makeLabel(citeKeyPattern, bibDatabaseContext.getDatabase(), entry, bibtexKeyPatternPreferences); } }