package org.jabref.logic.bibtexkeypattern;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.Scanner;
import java.util.StringJoiner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jabref.logic.formatter.Formatters;
import org.jabref.logic.formatter.casechanger.Word;
import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter;
import org.jabref.model.bibtexkeypattern.AbstractBibtexKeyPattern;
import org.jabref.model.cleanup.Formatter;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.Keyword;
import org.jabref.model.entry.KeywordList;
import org.jabref.model.strings.StringUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* This is the utility class of the LabelPattern package.
*/
public class BibtexKeyPatternUtil {
private static final Log LOGGER = LogFactory.getLog(BibtexKeyPatternUtil.class);
private static final String STARTING_CAPITAL_PATTERN = "[^A-Z]";
// All single characters that we can use for extending a key to make it unique:
private static final String CHARS = "abcdefghijklmnopqrstuvwxyz";
private static final Pattern REGEX_PATTERN = Pattern.compile(".*\\(\\{([A-Z]+)\\}\\).*");
private static final int CHARS_OF_FIRST = 5;
private BibtexKeyPatternUtil() {
}
private static String normalize(String content) {
List<String> tokens = new ArrayList<>();
int b = 0;
StringBuilder and = new StringBuilder();
StringBuilder token = new StringBuilder();
for (int p = 0; p < content.length(); p++) {
if (b == 0) {
String andString = and.toString(); // Avoid lots of calls
if (((andString.isEmpty()) && (content.charAt(p) == ' '))
|| (" ".equals(andString) && (content.charAt(p) == 'a'))
|| (" a".equals(andString) && (content.charAt(p) == 'n'))
|| (" an".equals(andString) && (content.charAt(p) == 'd'))) {
and.append(content.charAt(p));
} else if (" and".equals(and.toString()) && (content.charAt(p) == ' ')) {
and = new StringBuilder();
tokens.add(token.toString().trim());
token = new StringBuilder();
} else {
if (content.charAt(p) == '{') {
b++;
}
if (content.charAt(p) == '}') {
b--;
}
token.append(and);
and = new StringBuilder();
token.append(content.charAt(p));
}
} else {
token.append(content.charAt(p));
}
}
tokens.add(token.toString());
StringBuilder normalized = new StringBuilder("");
for (int i = 0; i < tokens.size(); i++) {
if (i > 0) {
normalized.append(" and ");
}
normalized.append(isInstitution(tokens.get(i)) ? generateInstitutionKey(tokens.get(i)) : removeDiacritics(
tokens.get(i)));
}
return normalized.toString();
}
/**
* Will remove diacritics from the content.
*
* Replaces umlaut: \"x with xe, e.g. \"o -> oe, \"u -> ue, etc.
* Removes all other diacritics: \?x -> x, e.g. \'a -> a, etc.
*
* @param content The content.
* @return The content without diacritics.
*/
private static String removeDiacritics(String content) {
if (content.isEmpty()) {
return content;
}
String result = content;
// Replace umlaut with '?e'
result = result.replaceAll("\\{\\\\\"([a-zA-Z])\\}", "$1e");
result = result.replaceAll("\\\\\"\\{([a-zA-Z])\\}", "$1e");
result = result.replaceAll("\\\\\"([a-zA-Z])", "$1e");
// Remove diacritics
result = result.replaceAll("\\{\\\\.([a-zA-Z])\\}", "$1");
result = result.replaceAll("\\\\.\\{([a-zA-Z])\\}", "$1");
result = result.replaceAll("\\\\.([a-zA-Z])", "$1");
return result;
}
/**
* Unifies umlauts.
*
* Replaces: $\ddot{\mathrm{X}}$ (an alternative umlaut) with: {\"X}
* Replaces: \?{X} and \?X with {\?X}, where ? is a diacritic symbol
*
* @param content The content.
* @return The content with unified diacritics.
*/
private static String unifyDiacritics(String content) {
return content.replaceAll(
"\\$\\\\ddot\\{\\\\mathrm\\{([^\\}])\\}\\}\\$",
"{\\\"$1}").replaceAll(
"(\\\\[^\\-a-zA-Z])\\{?([a-zA-Z])\\}?",
"{$1$2}");
}
/**
* Check if a value is institution.
*
* This is usable for distinguishing between persons and institutions in
* the author or editor fields.
*
* A person:
* - "John Doe"
* - "Doe, John"
*
* An institution:
* - "{The Big Company or Institution Inc.}"
* - "{The Big Company or Institution Inc. (BCI)}"
*
* @param author Author or editor.
* @return True if the author or editor is an institution.
*/
private static boolean isInstitution(String author) {
return StringUtil.isInCurlyBrackets(author);
}
/**
* <p>
* An author or editor may be and institution not a person. In that case the
* key generator builds very long keys, e.g.: for “The Attributed
* Graph Grammar System (AGG)” ->
* “TheAttributedGraphGrammarSystemAGG”.
* </p>
*
* <p>
* An institution name should be inside <code>{}</code> brackets. If the
* institution name also includes its abbreviation this abbreviation should
* be also in <code>{}</code> brackets. For the previous example the value
* should look like:
* <code>{The Attributed Graph Grammar System ({AGG})}</code>.
* </p>
*
* <p>
* If an institution includes its abbreviation, i.e. "...({XYZ})", first
* such abbreviation should be used as the key value part of such author.
* </p>
*
* <p>
* If an institution does not include its abbreviation the key should be
* generated form its name in the following way:
* </p>
*
* <p>
* The institution value can contain: institution name, part of the
* institution, address, etc. Those information should be separated by
* comma. Name of the institution and possible part of the institution
* should be on the beginning, while address and secondary information
* should be on the end.
* </p>
*
* Each part is examined separately:
* <ol>
* <li>We remove all tokens of a part which are one of the defined ignore
* words (the, press), which end with a dot (ltd., co., ...) and which first
* character is lowercase (of, on, di, ...).</li>
* <li>We detect a type of the part: university, technology institute,
* department, school, rest
* <ul>
* <li>University: <code>"Uni[NameOfTheUniversity]"</code></li>
* <li>Department: will be an abbreviation of all words beginning with the
* uppercase letter except of words: <code>d[ei]p.*</code>, school,
* faculty</li>
* <li>School: same as department</li>
* <li>Rest: If there are less than 3 tokens in such part than the result
* will be by concatenating those tokens, otherwise the result will be build
* from the first letters of words starting with and uppercase letter.</li>
* </ul>
* </ol>
*
* Parts are concatenated together in the following way:
* <ul>
* <li>If there is a university part use it otherwise use the rest part.</li>
* <li>If there is a school part append it.</li>
* <li>If there is a department part and it is not same as school part
* append it.</li>
* </ul>
*
* Rest part is only the first part which do not match any other type. All
* other parts (address, ...) are ignored.
*
* @param content the institution to generate a Bibtex key for
* @return <ul>
* <li>the institution key</li>
* <li>"" in the case of a failure</li>
* <li>null if content is null</li>
* </ul>
*/
private static String generateInstitutionKey(String content) {
if (content.isEmpty()) {
return content;
}
String result = content;
result = unifyDiacritics(result);
result = result.replaceAll("^\\{", "").replaceAll("\\}$", "");
Matcher matcher = REGEX_PATTERN.matcher(result);
if (matcher.matches()) {
return matcher.group(1);
}
result = removeDiacritics(result);
String[] parts = result.split(",");
// Key parts
String university = null;
String department = null;
String school = null;
String rest = null;
List<String> ignore = Arrays.asList("press", "the");
for (int index = 0; index < parts.length; index++) {
List<String> part = new ArrayList<>();
// Cleanup: remove unnecessary words.
for (String k : parts[index].replaceAll("\\{[A-Z]+\\}", "").split("[ \\-_]")) {
if ((!(k.isEmpty()) // remove empty
&& !ignore.contains(k.toLowerCase(Locale.ENGLISH)) // remove ignored words
&& (k.charAt(k.length() - 1) != '.')
&& (String.valueOf(k.charAt(0))).matches("[A-Z]"))
|| ((k.length() >= 3) && "uni".equalsIgnoreCase(k.substring(0, 2)))) {
part.add(k);
}
}
boolean isUniversity = false; // university
boolean isTechnology = false; // technology institute
boolean isDepartment = false; // departments
boolean isSchool = false; // schools
// Deciding about a part type...
for (String k : part) {
if (k.matches("^[Uu][Nn][Ii].*")) { // Starts with "uni" case and locale independent
isUniversity = true;
}
if (k.matches("^[Tt][Ee][Cc][Hh].*")) { // Starts with "tech" case and locale independent
isTechnology = true;
}
if (FieldName.SCHOOL.equalsIgnoreCase(k)) {
isSchool = true;
}
if (k.matches("^[Dd][EeIi][Pp].*") || k.matches("^[Ll][Aa][Bb].*")) { // Starts with "dep"/"dip"/"lab", case and locale independent
isDepartment = true;
}
}
if (isTechnology) {
isUniversity = false; // technology institute isn't university :-)
}
// University part looks like: Uni[NameOfTheUniversity]
//
// If university is detected than the previous part is suggested
// as department
if (isUniversity) {
StringBuilder universitySB = new StringBuilder();
universitySB.append("Uni");
for (String k : part) {
if (!k.matches("^[Uu][Nn][Ii].*")) {
universitySB.append(k);
}
}
university = universitySB.toString();
if ((index > 0) && (department == null)) {
department = parts[index - 1];
}
// School is an abbreviation of all the words beginning with a
// capital letter excluding: department, school and faculty words.
//
// Explicitly defined department part is build the same way as
// school
} else if (isSchool || isDepartment) {
StringBuilder schoolSB = new StringBuilder();
StringBuilder departmentSB = new StringBuilder();
for (String k : part) {
if (!k.matches("^[Dd][EeIi][Pp].*") && !FieldName.SCHOOL.equalsIgnoreCase(k)
&& !"faculty".equalsIgnoreCase(k)
&& !(k.replaceAll(STARTING_CAPITAL_PATTERN, "").isEmpty())) {
if (isSchool) {
schoolSB.append(k.replaceAll(STARTING_CAPITAL_PATTERN, ""));
}
if (isDepartment) {
departmentSB.append(k.replaceAll(STARTING_CAPITAL_PATTERN, ""));
}
}
}
if (isSchool) {
school = schoolSB.toString();
}
if (isDepartment) {
department = departmentSB.toString();
}
// A part not matching university, department nor school.
} else if (rest == null) {
StringBuilder restSB = new StringBuilder();
// Less than 3 parts -> concatenate those
if (part.size() < 3) {
for (String k : part) {
restSB.append(k);
// More than 3 parts -> use 1st letter abbreviation
}
} else {
for (String k : part) {
k = k.replaceAll(STARTING_CAPITAL_PATTERN, "");
if (!(k.isEmpty())) {
restSB.append(k);
}
}
}
rest = restSB.toString();
}
}
// Putting parts together.
return (university == null ? rest : university)
+ (school == null ? "" : school)
+ ((department == null)
|| ((school != null) && department.equals(school)) ?
"" : department);
}
/**
* Generates a BibTeX label according to the pattern for a given entry type, and saves the unique label in the
* <code>Bibtexentry</code>.
*
* The given database is used to avoid duplicate keys.
*
* @param citeKeyPattern
* @param database a <code>BibDatabase</code>
* @param entry a <code>BibEntry</code>
* @return modified BibEntry
*/
public static void makeAndSetLabel(AbstractBibtexKeyPattern citeKeyPattern, BibDatabase database, BibEntry entry,
BibtexKeyPatternPreferences bibtexKeyPatternPreferences) {
String newKey = makeLabel(citeKeyPattern, database, entry, bibtexKeyPatternPreferences);
entry.setCiteKey(newKey);
}
private static String makeLabel(AbstractBibtexKeyPattern citeKeyPattern, BibDatabase database, BibEntry entry, BibtexKeyPatternPreferences bibtexKeyPatternPreferences) {
String key;
StringBuilder stringBuilder = new StringBuilder();
try {
// get the type of entry
String entryType = entry.getType();
// Get the arrayList corresponding to the type
List<String> typeList = new ArrayList<>(citeKeyPattern.getValue(entryType));
if (!typeList.isEmpty()) {
typeList.remove(0);
}
boolean field = false;
for (String typeListEntry : typeList) {
if ("[".equals(typeListEntry)) {
field = true;
} else if ("]".equals(typeListEntry)) {
field = false;
} else if (field) {
// check whether there is a modifier on the end such as
// ":lower"
List<String> parts = parseFieldMarker(typeListEntry);
String label = makeLabel(entry, parts.get(0), bibtexKeyPatternPreferences.getKeywordDelimiter(), database);
// apply modifier if present
if (parts.size() > 1) {
label = applyModifiers(label, parts, 1);
}
stringBuilder.append(label);
} else {
stringBuilder.append(typeListEntry);
}
}
} catch (Exception e) {
LOGGER.warn("Cannot make label", e);
}
// Remove all illegal characters from the key.
key = checkLegalKey(stringBuilder.toString(), bibtexKeyPatternPreferences.isEnforceLegalKey());
// Remove Regular Expressions while generating Keys
String regex = bibtexKeyPatternPreferences.getKeyPatternRegex();
if ((regex != null) && !regex.trim().isEmpty()) {
String replacement = bibtexKeyPatternPreferences.getKeyPatternReplacement();
key = key.replaceAll(regex, replacement);
}
String oldKey = entry.getCiteKeyOptional().orElse(null);
int occurrences = database.getDuplicationChecker().getNumberOfKeyOccurrences(key);
if (Objects.equals(oldKey, key)) {
occurrences--; // No change, so we can accept one dupe.
}
boolean alwaysAddLetter = bibtexKeyPatternPreferences.isAlwaysAddLetter();
boolean firstLetterA = bibtexKeyPatternPreferences.isFirstLetterA();
String newKey;
if (!alwaysAddLetter && (occurrences == 0)) {
newKey = key;
} else {
// The key is already in use, so we must modify it.
int number = !alwaysAddLetter && !firstLetterA ? 1 : 0;
String moddedKey;
do {
moddedKey = key + getAddition(number);
number++;
occurrences = database.getDuplicationChecker().getNumberOfKeyOccurrences(moddedKey);
// only happens if #getAddition() is buggy
if (Objects.equals(oldKey, moddedKey)) {
occurrences--;
}
} while (occurrences > 0);
newKey = moddedKey;
}
return newKey;
}
/**
* Applies modifiers to a label generated based on a field marker.
* @param label The generated label.
* @param parts String array containing the modifiers.
* @param offset The number of initial items in the modifiers array to skip.
* @return The modified label.
*/
public static String applyModifiers(final String label, final List<String> parts, final int offset) {
String resultingLabel = label;
if (parts.size() > offset) {
for (int j = offset; j < parts.size(); j++) {
String modifier = parts.get(j);
if ("abbr".equals(modifier)) {
// Abbreviate - that is,
StringBuilder abbreviateSB = new StringBuilder();
String[] words = resultingLabel.replaceAll("[\\{\\}']", "")
.split("[\\(\\) \r\n\"]");
for (String word : words) {
if (!word.isEmpty()) {
abbreviateSB.append(word.charAt(0));
}
}
resultingLabel = abbreviateSB.toString();
} else {
Optional<Formatter> formatter = Formatters.getFormatterForModifier(modifier);
if (formatter.isPresent()) {
resultingLabel = formatter.get().format(label);
} else if (!modifier.isEmpty() && (modifier.length() >= 2) && (modifier.charAt(0) == '(') && modifier.endsWith(")")) {
// Alternate text modifier in parentheses. Should be inserted if
// the label is empty:
if (label.isEmpty() && (modifier.length() > 2)) {
resultingLabel = modifier.substring(1, modifier.length() - 1);
} else {
resultingLabel = label;
}
} else {
LOGGER.info("Key generator warning: unknown modifier '"
+ modifier + "'.");
resultingLabel = label;
}
}
}
}
return resultingLabel;
}
public static String makeLabel(BibEntry entry, String value, Character keywordDelimiter, BibDatabase database) {
String val = value;
try {
if (val.startsWith("auth") || val.startsWith("pureauth")) {
/*
* For label code "auth...": if there is no author, but there
* are editor(s) (e.g. for an Edited Book), use the editor(s)
* instead. (saw27@mrao.cam.ac.uk). This is what most people
* want, but in case somebody really needs a field which expands
* to nothing if there is no author (e.g. someone who uses both
* "auth" and "ed" in the same label), we provide an alternative
* form "pureauth..." which does not do this fallback
* substitution of editor.
*/
String authString;
if (database != null) {
authString = entry.getField(FieldName.AUTHOR)
.map(authorString -> normalize(database.resolveForStrings(authorString))).orElse("");
} else {
authString = entry.getField(FieldName.AUTHOR).orElse("");
}
if (val.startsWith("pure")) {
// remove the "pure" prefix so the remaining
// code in this section functions correctly
val = val.substring(4);
}
if (authString.isEmpty()) {
if (database != null) {
authString = entry.getField(FieldName.EDITOR)
.map(authorString -> normalize(database.resolveForStrings(authorString))).orElse("");
} else {
authString = entry.getField(FieldName.EDITOR).orElse("");
}
}
// Gather all author-related checks, so we don't
// have to check all the time.
if ("auth".equals(val)) {
return firstAuthor(authString);
} else if ("authForeIni".equals(val)) {
return firstAuthorForenameInitials(authString);
} else if ("authFirstFull".equals(val)) {
return firstAuthorVonAndLast(authString);
} else if ("authors".equals(val)) {
return allAuthors(authString);
} else if ("authorsAlpha".equals(val)) {
return authorsAlpha(authString);
}
// Last author's last name
else if ("authorLast".equals(val)) {
return lastAuthor(authString);
} else if ("authorLastForeIni".equals(val)) {
return lastAuthorForenameInitials(authString);
} else if ("authorIni".equals(val)) {
return oneAuthorPlusIni(authString);
} else if (val.matches("authIni[\\d]+")) {
int num = Integer.parseInt(val.substring(7));
return authIniN(authString, num);
} else if ("auth.auth.ea".equals(val)) {
return authAuthEa(authString);
} else if ("auth.etal".equals(val)) {
return authEtal(authString, ".", ".etal");
} else if ("authEtAl".equals(val)) {
return authEtal(authString, "", "EtAl");
} else if ("authshort".equals(val)) {
return authshort(authString);
} else if (val.matches("auth[\\d]+_[\\d]+")) {
String[] nums = val.substring(4).split("_");
return authNofMth(authString, Integer.parseInt(nums[0]),
Integer.parseInt(nums[1]));
} else if (val.matches("auth\\d+")) {
// authN. First N chars of the first author's last
// name.
String fa = firstAuthor(authString);
int num = Integer.parseInt(val.substring(4));
if (num > fa.length()) {
num = fa.length();
}
return fa.substring(0, num);
} else if (val.matches("authors\\d+")) {
return nAuthors(authString, Integer.parseInt(val.substring(7)));
} else {
// This "auth" business was a dead end, so just
// use it literally:
return entry.getFieldOrAlias(val).orElse("");
}
} else if (val.startsWith("ed")) {
// Gather all markers starting with "ed" here, so we
// don't have to check all the time.
if ("edtr".equals(val)) {
return firstAuthor(entry.getField(FieldName.EDITOR).orElse(""));
} else if ("edtrForeIni".equals(val)) {
return firstAuthorForenameInitials(entry.getField(FieldName.EDITOR).orElse(""));
} else if ("editors".equals(val)) {
return allAuthors(entry.getField(FieldName.EDITOR).orElse(""));
// Last author's last name
} else if ("editorLast".equals(val)) {
return lastAuthor(entry.getField(FieldName.EDITOR).orElse(""));
} else if ("editorLastForeIni".equals(val)) {
return lastAuthorForenameInitials(entry.getField(FieldName.EDITOR).orElse(""));
} else if ("editorIni".equals(val)) {
return oneAuthorPlusIni(entry.getField(FieldName.EDITOR).orElse(""));
} else if (val.matches("edtrIni[\\d]+")) {
int num = Integer.parseInt(val.substring(7));
return authIniN(entry.getField(FieldName.EDITOR).orElse(""), num);
} else if (val.matches("edtr[\\d]+_[\\d]+")) {
String[] nums = val.substring(4).split("_");
return authNofMth(entry.getField(FieldName.EDITOR).orElse(""),
Integer.parseInt(nums[0]),
Integer.parseInt(nums[1]) - 1);
} else if ("edtr.edtr.ea".equals(val)) {
return authAuthEa(entry.getField(FieldName.EDITOR).orElse(""));
} else if ("edtrshort".equals(val)) {
return authshort(entry.getField(FieldName.EDITOR).orElse(""));
}
// authN. First N chars of the first author's last
// name.
else if (val.matches("edtr\\d+")) {
String fa = firstAuthor(entry.getField(FieldName.EDITOR).orElse(""));
int num = Integer.parseInt(val.substring(4));
if (num > fa.length()) {
num = fa.length();
}
return fa.substring(0, num);
} else {
// This "ed" business was a dead end, so just
// use it literally:
return entry.getFieldOrAlias(val).orElse("");
}
} else if ("firstpage".equals(val)) {
return firstPage(entry.getField(FieldName.PAGES).orElse(""));
} else if ("lastpage".equals(val)) {
return lastPage(entry.getField(FieldName.PAGES).orElse(""));
} else if ("title".equals(val)) {
return camelizeSignificantWordsInTitle(entry.getField(FieldName.TITLE).orElse(""));
} else if ("shorttitle".equals(val)) {
return getTitleWords(3, entry.getField(FieldName.TITLE).orElse(""));
} else if ("shorttitleINI".equals(val)) {
return keepLettersAndDigitsOnly(
applyModifiers(getTitleWordsWithSpaces(3, entry.getField(FieldName.TITLE).orElse("")),
Collections.singletonList("abbr"), 0));
} else if ("veryshorttitle".equals(val)) {
return getTitleWords(1,
removeSmallWords(entry.getField(FieldName.TITLE).orElse("")));
} else if ("camel".equals(val)) {
return getCamelizedTitle(entry.getField(FieldName.TITLE).orElse(""));
} else if ("shortyear".equals(val)) {
String yearString = entry.getFieldOrAlias(FieldName.YEAR).orElse("");
if (yearString.isEmpty()) {
return yearString;
// In press/in preparation/submitted
} else if (yearString.startsWith("in") || yearString.startsWith("sub")) {
return "IP";
} else if (yearString.length() > 2) {
return yearString.substring(yearString.length() - 2);
} else {
return yearString;
}
} else if (val.matches("keyword\\d+")) {
// according to LabelPattern.php, it returns keyword number n
int num = Integer.parseInt(val.substring(7));
KeywordList separatedKeywords = entry.getKeywords(keywordDelimiter);
if (separatedKeywords.size() < num) {
// not enough keywords
return "";
} else {
// num counts from 1 to n, but index in arrayList count from 0 to n-1
return separatedKeywords.get(num - 1).toString();
}
} else if (val.matches("keywords\\d*")) {
// return all keywords, not separated
int num;
if (val.length() > 8) {
num = Integer.parseInt(val.substring(8));
} else {
num = Integer.MAX_VALUE;
}
KeywordList separatedKeywords = entry.getKeywords(keywordDelimiter);
StringBuilder sb = new StringBuilder();
int i = 0;
for (Keyword keyword : separatedKeywords) {
// remove all spaces
sb.append(keyword.toString().replaceAll("\\s+", ""));
i++;
if (i >= num) {
break;
}
}
return sb.toString();
} else {
// we haven't seen any special demands
return entry.getFieldOrAlias(val).orElse("");
}
} catch (NullPointerException ex) {
LOGGER.debug("Problem making label", ex);
return "";
}
}
/**
* Computes an appendix to a BibTeX key that could make it unique. We use
* a-z for numbers 0-25, and then aa-az, ba-bz, etc.
*
* @param number
* The appendix number.
* @return The String to append.
*/
private static String getAddition(int number) {
if (number >= CHARS.length()) {
int lastChar = number % CHARS.length();
return getAddition((number / CHARS.length()) - 1) + CHARS.substring(lastChar, lastChar + 1);
} else {
return CHARS.substring(number, number + 1);
}
}
/**
* Determines "number" words out of the "title" field in the given BibTeX entry
*/
public static String getTitleWords(int number, String title) {
return keepLettersAndDigitsOnly(getTitleWordsWithSpaces(number, title));
}
/**
* Removes any '-', unnecessary whitespace and latex commands formatting
*/
private static String formatTitle(String title) {
String ss = new RemoveLatexCommandsFormatter().format(title);
StringBuilder stringBuilder = new StringBuilder();
StringBuilder current;
int piv = 0;
while (piv < ss.length()) {
current = new StringBuilder();
// Get the next word:
while ((piv < ss.length()) && !Character.isWhitespace(ss.charAt(piv))
&& (ss.charAt(piv) != '-')) {
current.append(ss.charAt(piv));
piv++;
}
piv++;
// Check if it is ok:
String word = current.toString().trim();
if (word.isEmpty()) {
continue;
}
// If we get here, the word was accepted.
if (stringBuilder.length() > 0) {
stringBuilder.append(' ');
}
stringBuilder.append(word);
}
return stringBuilder.toString();
}
/**
* Capitalises and concatenates the words out of the "title" field in the given BibTeX entry
*/
public static String getCamelizedTitle(String title) {
return keepLettersAndDigitsOnly(camelizeTitle(title));
}
private static String camelizeTitle(String title) {
StringBuilder stringBuilder = new StringBuilder();
String formattedTitle = formatTitle(title);
try (Scanner titleScanner = new Scanner(formattedTitle)) {
while (titleScanner.hasNext()) {
String word = titleScanner.next();
// Camelize the word
word = word.substring(0, 1).toUpperCase(Locale.ROOT) + word.substring(1);
if (stringBuilder.length() > 0) {
stringBuilder.append(' ');
}
stringBuilder.append(word);
}
}
return stringBuilder.toString();
}
/**
* Capitalises the significant words of the "title" field in the given BibTeX entry
*/
public static String camelizeSignificantWordsInTitle(String title) {
StringJoiner stringJoiner = new StringJoiner(" ");
String formattedTitle = formatTitle(title);
Boolean camelize;
try (Scanner titleScanner = new Scanner(formattedTitle)) {
while (titleScanner.hasNext()) {
String word = titleScanner.next();
camelize = true;
// Camelize the word if it is significant
for (String smallWord : Word.SMALLER_WORDS) {
if (word.equalsIgnoreCase(smallWord)) {
camelize = false;
continue;
}
}
// We want to capitalize significant words and the first word of the title
if (camelize || (stringJoiner.length() == 0)) {
word = word.substring(0, 1).toUpperCase(Locale.ROOT) + word.substring(1);
} else {
word = word.substring(0, 1).toLowerCase(Locale.ROOT) + word.substring(1);
}
stringJoiner.add(word);
}
}
return stringJoiner.toString();
}
public static String removeSmallWords(String title) {
StringJoiner stringJoiner = new StringJoiner(" ");
String formattedTitle = formatTitle(title);
try (Scanner titleScanner = new Scanner(formattedTitle)) {
mainl: while (titleScanner.hasNext()) {
String word = titleScanner.next();
for (String smallWord : Word.SMALLER_WORDS) {
if (word.equalsIgnoreCase(smallWord)) {
continue mainl;
}
}
stringJoiner.add(word);
}
}
return stringJoiner.toString();
}
private static String getTitleWordsWithSpaces(int number, String title) {
StringJoiner stringJoiner = new StringJoiner(" ");
String formattedTitle = formatTitle(title);
int words = 0;
try (Scanner titleScanner = new Scanner(formattedTitle)) {
while (titleScanner.hasNext() && (words < number)) {
String word = titleScanner.next();
stringJoiner.add(word);
words++;
}
}
return stringJoiner.toString();
}
private static String keepLettersAndDigitsOnly(String in) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < in.length(); i++) {
if (Character.isLetterOrDigit(in.charAt(i))) {
stringBuilder.append(in.charAt(i));
}
}
return stringBuilder.toString();
}
/**
* Gets the last name of the first author/editor
*
* @param authorField
* a <code>String</code>
* @return the surname of an author/editor or "" if no author was found
* This method is guaranteed to never return null.
*
* @throws NullPointerException
* if authorField == null
*/
public static String firstAuthor(String authorField) {
AuthorList authorList = AuthorList.parse(authorField);
if (authorList.isEmpty()) {
return "";
}
return authorList.getAuthor(0).getLast().orElse("");
}
/**
* Gets the first name initials of the first author/editor
*
* @param authorField
* a <code>String</code>
* @return the first name initial of an author/editor or "" if no author was found
* This method is guaranteed to never return null.
*
* @throws NullPointerException
* if authorField == null
*/
public static String firstAuthorForenameInitials(String authorField) {
AuthorList authorList = AuthorList.parse(authorField);
if (authorList.isEmpty()) {
return "";
}
return authorList.getAuthor(0).getFirstAbbr().map(s -> s.substring(0, 1)).orElse("");
}
/**
* Gets the von part and the last name of the first author/editor
* No spaces are returned
*
* @param authorField
* a <code>String</code>
* @return the von part and surname of an author/editor or "" if no author was found.
* This method is guaranteed to never return null.
*
* @throws NullPointerException
* if authorField == null
*/
public static String firstAuthorVonAndLast(String authorField) {
AuthorList authorList = AuthorList.parse(authorField);
if (authorList.isEmpty()) {
return "";
}
StringBuilder stringBuilder = new StringBuilder();
authorList.getAuthor(0).getVon().ifPresent(vonAuthor -> stringBuilder.append(vonAuthor.replaceAll(" ", "")));
authorList.getAuthor(0).getLast().ifPresent(stringBuilder::append);
return stringBuilder.toString();
}
/**
* Gets the last name of the last author/editor
* @param authorField a <code>String</code>
* @return the surname of an author/editor
*/
public static String lastAuthor(String authorField) {
String[] tokens = AuthorList.fixAuthorForAlphabetization(authorField).split("\\s+\\band\\b\\s+");
if (tokens.length > 0) {
String[] lastAuthor = tokens[tokens.length - 1].split(",");
return lastAuthor[0];
} else {
// if author is empty
return "";
}
}
/**
* Gets the forename initials of the last author/editor
*
* @param authorField
* a <code>String</code>
* @return the forename initial of an author/editor or "" if no author was found
* This method is guaranteed to never return null.
*
* @throws NullPointerException
* if authorField == null
*/
public static String lastAuthorForenameInitials(String authorField) {
AuthorList authorList = AuthorList.parse(authorField);
if (authorList.isEmpty()) {
return "";
}
return authorList.getAuthor(authorList.getNumberOfAuthors() - 1).getFirstAbbr().map(s -> s.substring(0, 1))
.orElse("");
}
/**
* Gets the last name of all authors/editors
* @param authorField a <code>String</code>
* @return the sur name of all authors/editors
*/
public static String allAuthors(String authorField) {
// Quick hack to use NAuthors to avoid code duplication
return nAuthors(authorField, Integer.MAX_VALUE);
}
/**
* Returns the authors according to the BibTeX-alpha-Style
* @param authorField string containing the value of the author field
* @return the initials of all authornames
*/
public static String authorsAlpha(String authorField) {
String authors = "";
String fixedAuthors = AuthorList.fixAuthorLastNameOnlyCommas(authorField, false);
// drop the "and" before the last author
// -> makes processing easier
fixedAuthors = fixedAuthors.replace(" and ", ", ");
String[] tokens = fixedAuthors.split(",");
int max = tokens.length > 4 ? 3 : tokens.length;
if (max == 1) {
String[] firstAuthor = tokens[0].replaceAll("\\s+", " ").trim().split(" ");
// take first letter of any "prefixes" (e.g. van der Aalst -> vd)
for (int j = 0; j < (firstAuthor.length - 1); j++) {
authors = authors.concat(firstAuthor[j].substring(0, 1));
}
// append last part of last name completely
authors = authors.concat(firstAuthor[firstAuthor.length - 1].substring(0,
Math.min(3, firstAuthor[firstAuthor.length - 1].length())));
} else {
for (int i = 0; i < max; i++) {
// replace all whitespaces by " "
// split the lastname at " "
String[] curAuthor = tokens[i].replaceAll("\\s+", " ").trim().split(" ");
for (String aCurAuthor : curAuthor) {
// use first character of each part of lastname
authors = authors.concat(aCurAuthor.substring(0, 1));
}
}
if (tokens.length > 4) {
authors = authors.concat("+");
}
}
return authors;
}
/**
* Gets the surnames of the first N authors and appends EtAl if there are more than N authors
* @param authorField a <code>String</code>
* @param n the number of desired authors
* @return Gets the surnames of the first N authors and appends EtAl if there are more than N authors
*/
public static String nAuthors(String authorField, int n) {
String[] tokens = AuthorList.fixAuthorForAlphabetization(authorField).split("\\s+\\band\\b\\s+");
int i = 0;
StringBuilder authorSB = new StringBuilder();
while ((tokens.length > i) && (i < n)) {
String lastName = tokens[i].replaceAll(",\\s+.*", "");
authorSB.append(lastName);
i++;
}
if (tokens.length > n) {
authorSB.append("EtAl");
}
return authorSB.toString();
}
/**
* Gets the first part of the last name of the first
* author/editor, and appends the last name initial of the
* remaining authors/editors.
* Maximum 5 characters
* @param authorField a <code>String</code>
* @return the surname of all authors/editors
*/
public static String oneAuthorPlusIni(String authorField) {
String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField);
String[] tokens = fixedAuthorField.split("\\s+\\band\\b\\s+");
if (tokens.length == 0) {
return "";
}
String firstAuthor = tokens[0].split(",")[0];
StringBuilder authorSB = new StringBuilder();
authorSB.append(firstAuthor.substring(0, Math.min(CHARS_OF_FIRST, firstAuthor.length())));
int i = 1;
while (tokens.length > i) {
// convert lastname, firstname to firstname lastname
authorSB.append(tokens[i].charAt(0));
i++;
}
return authorSB.toString();
}
/**
* auth.auth.ea format:
* Isaac Newton and James Maxwell and Albert Einstein (1960)
* Isaac Newton and James Maxwell (1960)
* give:
* Newton.Maxwell.ea
* Newton.Maxwell
*/
public static String authAuthEa(String authorField) {
String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField);
String[] tokens = fixedAuthorField.split("\\s+\\band\\b\\s+");
if (tokens.length == 0) {
return "";
}
StringBuilder author = new StringBuilder();
// append first author
author.append((tokens[0].split(","))[0]);
if (tokens.length >= 2) {
// append second author
author.append('.').append((tokens[1].split(","))[0]);
}
if (tokens.length > 2) {
// append ".ea" if more than 2 authors
author.append(".ea");
}
return author.toString();
}
/**
* auth.etal, authEtAl, ... format:
* Isaac Newton and James Maxwell and Albert Einstein (1960)
* Isaac Newton and James Maxwell (1960)
*
* auth.etal give (delim=".", append=".etal"):
* Newton.etal
* Newton.Maxwell
*
* authEtAl give (delim="", append="EtAl"):
* NewtonEtAl
* NewtonMaxwell
*
* Note that [authEtAl] equals [authors2]
*/
public static String authEtal(String authorField, String delim,
String append) {
String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField);
String[] tokens = fixedAuthorField.split("\\s*\\band\\b\\s*");
if (tokens.length == 0) {
return "";
}
StringBuilder author = new StringBuilder();
author.append((tokens[0].split(","))[0]);
if (tokens.length == 2) {
author.append(delim).append((tokens[1].split(","))[0]);
} else if (tokens.length > 2) {
author.append(append);
}
return author.toString();
}
/**
* The first N characters of the Mth author/editor.
* M starts counting from 1
*/
public static String authNofMth(String authorField, int n, int m) {
// have m counting from 0
int mminusone = m - 1;
String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField);
String[] tokens = fixedAuthorField.split("\\s+\\band\\b\\s+");
if ((tokens.length <= mminusone) || (n < 0) || (mminusone < 0)) {
return "";
}
String lastName = (tokens[mminusone].split(","))[0];
if (lastName.length() <= n) {
return lastName;
} else {
return lastName.substring(0, n);
}
}
/**
* authshort format:
* added by Kolja Brix, kbx@users.sourceforge.net
*
* given author names
*
* Isaac Newton and James Maxwell and Albert Einstein and N. Bohr
*
* Isaac Newton and James Maxwell and Albert Einstein
*
* Isaac Newton and James Maxwell
*
* Isaac Newton
*
* yield
*
* NME+
*
* NME
*
* NM
*
* Newton
*/
public static String authshort(String authorField) {
String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField);
StringBuilder author = new StringBuilder();
String[] tokens = fixedAuthorField.split("\\band\\b");
int i = 0;
if (tokens.length == 1) {
author.append(authNofMth(fixedAuthorField, fixedAuthorField.length(), 1));
} else if (tokens.length >= 2) {
while ((tokens.length > i) && (i < 3)) {
author.append(authNofMth(fixedAuthorField, 1, i + 1));
i++;
}
if (tokens.length > 3) {
author.append('+');
}
}
return author.toString();
}
/**
* authIniN format:
*
* Each author gets (N div #authors) chars, the remaining (N mod #authors)
* chars are equally distributed to the authors first in the row.
*
* If (N < #authors), only the first N authors get mentioned.
*
* For example if
*
* a) I. Newton and J. Maxwell and A. Einstein and N. Bohr (..)
*
* b) I. Newton and J. Maxwell and A. Einstein
*
* c) I. Newton and J. Maxwell
*
* d) I. Newton
*
* authIni4 gives: a) NMEB, b) NeME, c) NeMa, d) Newt
*
* @param authorField
* The authors to format.
*
* @param n
* The maximum number of characters this string will be long. A
* negative number or zero will lead to "" be returned.
*
* @throws NullPointerException
* if authorField is null and n > 0
*/
public static String authIniN(String authorField, int n) {
if (n <= 0) {
return "";
}
String fixedAuthorField = AuthorList.fixAuthorForAlphabetization(authorField);
StringBuilder author = new StringBuilder();
String[] tokens = fixedAuthorField.split("\\band\\b");
if (tokens.length == 0) {
return author.toString();
}
int i = 0;
int charsAll = n / tokens.length;
while (tokens.length > i) {
if (i < (n % tokens.length)) {
author.append(authNofMth(fixedAuthorField, charsAll + 1, i + 1));
} else {
author.append(authNofMth(fixedAuthorField, charsAll, i + 1));
}
i++;
}
if (author.length() <= n) {
return author.toString();
} else {
return author.toString().substring(0, n);
}
}
/**
* Split the pages field into separate numbers and return the lowest
*
* @param pages
* (may not be null) a pages string such as 42--111 or
* 7,41,73--97 or 43+
*
* @return the first page number or "" if no number is found in the string
*
* @throws NullPointerException
* if pages is null
*/
public static String firstPage(String pages) {
final String[] splitPages = pages.split("\\D+");
int result = Integer.MAX_VALUE;
for (String n : splitPages) {
if (n.matches("\\d+")) {
result = Math.min(Integer.parseInt(n), result);
}
}
if (result == Integer.MAX_VALUE) {
return "";
} else {
return String.valueOf(result);
}
}
/**
* Split the pages field into separate numbers and return the highest
*
* @param pages
* a pages string such as 42--111 or 7,41,73--97 or 43+
*
* @return the first page number or "" if no number is found in the string
*
* @throws NullPointerException
* if pages is null.
*/
public static String lastPage(String pages) {
final String[] splitPages = pages.split("\\D+");
int result = Integer.MIN_VALUE;
for (String n : splitPages) {
if (n.matches("\\d+")) {
result = Math.max(Integer.parseInt(n), result);
}
}
if (result == Integer.MIN_VALUE) {
return "";
} else {
return String.valueOf(result);
}
}
/**
* Parse a field marker with modifiers, possibly containing a parenthesised modifier,
* as well as escaped colons and parentheses.
* @param arg The argument string.
* @return An array of strings representing the parts of the marker
*/
private static List<String> parseFieldMarker(String arg) {
List<String> parts = new ArrayList<>();
StringBuilder current = new StringBuilder();
boolean escaped = false;
int inParenthesis = 0;
for (int i = 0; i < arg.length(); i++) {
char currentChar = arg.charAt(i);
if ((currentChar == ':') && !escaped && (inParenthesis == 0)) {
parts.add(current.toString());
current = new StringBuilder();
} else if ((currentChar == '(') && !escaped) {
inParenthesis++;
current.append(currentChar);
} else if ((currentChar == ')') && !escaped && (inParenthesis > 0)) {
inParenthesis--;
current.append(currentChar);
} else if (currentChar == '\\') {
if (escaped) {
escaped = false;
current.append(currentChar);
} else {
escaped = true;
}
} else if (escaped) {
current.append(currentChar);
escaped = false;
} else {
current.append(currentChar);
}
}
parts.add(current.toString());
return parts;
}
/**
* This method returns a String similar to the one passed in, except that it is molded into a form that is
* acceptable for bibtex.
* <p>
* Watch-out that the returned string might be of length 0 afterwards.
*
* @param key mayBeNull
* @param enforceLegalKey make sure that the key is legal in all respects
*/
public static String checkLegalKey(String key, boolean enforceLegalKey) {
if (key == null) {
return null;
}
if (!enforceLegalKey) {
// User doesn't want us to enforce legal characters. We must still look
// for whitespace and some characters such as commas, since these would
// interfere with parsing:
StringBuilder newKey = new StringBuilder();
for (int i = 0; i < key.length(); i++) {
char c = key.charAt(i);
if (!Character.isWhitespace(c) && ("{}(),\\\"".indexOf(c) == -1)) {
newKey.append(c);
}
}
return newKey.toString();
}
StringBuilder newKey = new StringBuilder();
for (int i = 0; i < key.length(); i++) {
char c = key.charAt(i);
if (!Character.isWhitespace(c) && ("{}(),\\\"#~^'".indexOf(c) == -1)) {
newKey.append(c);
}
}
// Replace non-English characters like umlauts etc. with a sensible
// letter or letter combination that bibtex can accept.
return StringUtil.replaceSpecialCharacters(newKey.toString());
}
public static String makeLabel(BibDatabaseContext bibDatabaseContext,
BibEntry entry,
BibtexKeyPatternPreferences bibtexKeyPatternPreferences) {
AbstractBibtexKeyPattern citeKeyPattern = bibDatabaseContext.getMetaData().getCiteKeyPattern(bibtexKeyPatternPreferences.getKeyPattern());
return makeLabel(citeKeyPattern, bibDatabaseContext.getDatabase(), entry, bibtexKeyPatternPreferences);
}
}